def test_center_data(): n_samples = 200 n_features = 2 rng = check_random_state(0) X = rng.rand(n_samples, n_features) y = rng.rand(n_samples) expected_X_mean = np.mean(X, axis=0) # XXX: currently scaled to variance=n_samples expected_X_std = np.std(X, axis=0) * np.sqrt(X.shape[0]) expected_y_mean = np.mean(y, axis=0) Xt, yt, X_mean, y_mean, X_std = center_data(X, y, fit_intercept=False, normalize=False) assert_array_almost_equal(X_mean, np.zeros(n_features)) assert_array_almost_equal(y_mean, 0) assert_array_almost_equal(X_std, np.ones(n_features)) assert_array_almost_equal(Xt, X) assert_array_almost_equal(yt, y) Xt, yt, X_mean, y_mean, X_std = center_data(X, y, fit_intercept=True, normalize=False) assert_array_almost_equal(X_mean, expected_X_mean) assert_array_almost_equal(y_mean, expected_y_mean) assert_array_almost_equal(X_std, np.ones(n_features)) assert_array_almost_equal(Xt, X - expected_X_mean) assert_array_almost_equal(yt, y - expected_y_mean) Xt, yt, X_mean, y_mean, X_std = center_data(X, y, fit_intercept=True, normalize=True) assert_array_almost_equal(X_mean, expected_X_mean) assert_array_almost_equal(y_mean, expected_y_mean) assert_array_almost_equal(X_std, expected_X_std) assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_std) assert_array_almost_equal(yt, y - expected_y_mean)
def test_center_data_weighted(): n_samples = 200 n_features = 2 rng = check_random_state(0) X = rng.rand(n_samples, n_features) y = rng.rand(n_samples) sample_weight = rng.rand(n_samples) expected_X_mean = np.average(X, axis=0, weights=sample_weight) expected_y_mean = np.average(y, axis=0, weights=sample_weight) # XXX: if normalize=True, should we expect a weighted standard deviation? # Currently not weighted, but calculated with respect to weighted mean # XXX: currently scaled to variance=n_samples expected_X_std = (np.sqrt(X.shape[0]) * np.mean((X - expected_X_mean) ** 2, axis=0) ** .5) Xt, yt, X_mean, y_mean, X_std = center_data(X, y, fit_intercept=True, normalize=False, sample_weight=sample_weight) assert_array_almost_equal(X_mean, expected_X_mean) assert_array_almost_equal(y_mean, expected_y_mean) assert_array_almost_equal(X_std, np.ones(n_features)) assert_array_almost_equal(Xt, X - expected_X_mean) assert_array_almost_equal(yt, y - expected_y_mean) Xt, yt, X_mean, y_mean, X_std = center_data(X, y, fit_intercept=True, normalize=True, sample_weight=sample_weight) assert_array_almost_equal(X_mean, expected_X_mean) assert_array_almost_equal(y_mean, expected_y_mean) assert_array_almost_equal(X_std, expected_X_std) assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_std) assert_array_almost_equal(yt, y - expected_y_mean)
def test_randomized_logistic_sparse(): """Check randomized sparse logistic regression on sparse data""" iris = load_iris() X = iris.data[:, [0, 2]] y = iris.target X = X[y != 2] y = y[y != 2] # center here because sparse matrices are usually not centered X, y, _, _, _ = center_data(X, y, True, True) X_sp = sparse.csr_matrix(X) F, _ = f_classif(X, y) scaling = 0.3 clf = RandomizedLogisticRegression(verbose=False, C=1., random_state=42, scaling=scaling, n_resampling=50, tol=1e-3) feature_scores = clf.fit(X, y).scores_ clf = RandomizedLogisticRegression(verbose=False, C=1., random_state=42, scaling=scaling, n_resampling=50, tol=1e-3) feature_scores_sp = clf.fit(X_sp, y).scores_ assert_array_equal(feature_scores, feature_scores_sp)
def __init__(self, n_samples, n_features, interval, test_size = 0.33, normalize = True, centerdata = True, transformation=NullTransformation(), fit_intercept = True): self.n_samples = n_samples self.n_features = n_features self.transformation = transformation lower = interval[0] upper = interval[1] random.seed(1) data = [np.array([random.uniform(lower, upper) for j in range(n_features)]) for i in range(n_samples)] Y = map(lambda x : self.transformation.transform(x), data) self.X = np.row_stack(data) self.informative = Y[0][1] self.Y = map(itemgetter(0), Y) XTrain, XTest, YTrain, YTest = train_test_split(self.X, self.Y, test_size=test_size,random_state=0) self.XTrain_orig = XTrain self.XTest_orig = XTest self.YTrain_orig = YTrain self.YTest_orig = YTest if centerdata==True: self.XTrain, self.YTrain, X_mean, y_mean, X_std = center_data(XTrain, YTrain, fit_intercept=fit_intercept, normalize = normalize) self.XTest, self.YTest = self.center_test(XTest,YTest,X_mean,y_mean,X_std) else: self.XTrain = XTrain self.YTrain = YTrain self.XTest = XTest self.YTest = YTest
def __init__(self, n_samples, n_features, n_informative, normalize_y = False, normalize = True, centerdata = True, transformation=NullTransformation(), fit_intercept = True): self.n_samples = n_samples self.n_features = n_features X, Y = datasets.make_regression(n_samples=self.n_samples, n_features=self.n_features, n_informative=n_informative, shuffle=False, random_state=11) XTrain, XTest, YTrain, YTest = train_test_split(X, Y, test_size=0.33,random_state=0) self.XTrain_orig = XTrain self.XTest_orig = XTest self.YTrain_orig = YTrain self.YTest_orig = YTest if centerdata==True: self.XTrain, YTrain, X_mean, y_mean, X_std = center_data(XTrain, YTrain, fit_intercept=fit_intercept, normalize = normalize) self.XTest, YTest = self.center_test(XTest,YTest,X_mean,y_mean,X_std) if normalize_y: self.YTrain, self.YTest = self.normalize_labels(YTrain, YTest) else: self.YTrain = YTrain self.YTest = YTest else: self.XTrain = XTrain self.YTrain = YTrain self.XTest = XTest self.YTest = YTest self.transformation = transformation
def fitting(self,XTrain, YTrain, XTest,YTest): YTrain_ = np.log(YTrain) if np.isnan(YTrain_).any(): print("log y nan") return YTest_ = np.log(YTest) if np.isnan(YTest_).any(): print("log y nan") return XTrain_transf = np.log(XTrain) if np.isnan(XTrain_transf): print("log x nan") return XTest_transf = np.log(XTest) if np.isnan(XTest_transf): print("log x nan") return ##centratura dei dati XTrain_transf, YTrain_, X_mean, y_mean, X_std = center_data(XTrain_transf, YTrain_, fit_intercept=True, normalize = True) XTest_transf, YTest_ = center_test(XTest_transf,YTest_,X_mean,y_mean,X_std) new_loss,_ = compute_lasso(XTrain_transf, YTrain_, XTest_transf, YTest_, score = "r2_score") print("loss log(y) e log(x) :", new_loss )
def fit(self, X, y): X, _, self.X_mean, _, self.X_std = center_data(X, y, True, True, copy=False) X_t = self.pca.fit_transform(X) evr = numpy.cumsum(self.pca.explained_variance_ratio_) self.evr_idx = numpy.where(evr < self.explained_var)[0].max() + 1 X_t = X_t[:,:(self.evr_idx+1)] print X.shape, X_t.shape, self.evr_idx self.svr.fit(X_t, y)
def fitting(self, XTrain, YTrain, XTest,YTest,values_TM ): ##center data XTrain_, YTrain_, X_mean, y_mean, X_std = center_data(XTrain, YTrain, fit_intercept=True, normalize = True) XTest_, YTest_ = center_test(XTest,YTest,X_mean,y_mean,X_std, normalize=True) ##compute linear lasso new_loss, beta = compute_lasso(XTrain_, YTrain_, XTest_, YTest_,score = "r2_score", values_TM = values_TM) print("loss lineare", new_loss)
def _preprocess_data(X, y, fit_intercept, normalize, copy=True, return_mean=False): if not return_mean: return center_data(X, y, fit_intercept, normalize, copy=False) else: return sparse_center_data(X, y, fit_intercept, normalize)
def fit(self, X, y): X, _, self.X_mean, _, self.X_std = center_data(X, y, True, True, copy=False) X_t = self.pca.fit_transform(X) evr = numpy.cumsum(self.pca.explained_variance_ratio_) self.evr_idx = numpy.where(evr < self.explained_var)[0].max() + 1 X_t = X_t[:, :(self.evr_idx + 1)] print X.shape, X_t.shape, self.evr_idx self.svr.fit(X_t, y)
def __init__(self,dir,filename, centerdata = True, split = 0): if split: Y, X = svm_read_problem(dir+"/"+filename) X = self. compute_data(X) self.XTrain, self.XTest, self.YTrain, self.YTest = train_test_split(X, Y, test_size=0.33,random_state=0) else: self.YTrain, XTrain_dict = svm_read_problem(dir+"/"+filename+"/"+filename+"_train") self.YTest,XTest_dict = svm_read_problem(dir+"/"+filename+"/"+filename+"_test") self.XTrain = self.convert_to_matrix(XTrain_dict) self.XTest = self.convert_to_matrix(XTest_dict) if centerdata: self.XTrain, self.YTrain, X_mean, y_mean, X_std = center_data(self.XTrain, self.YTrain, fit_intercept=True, normalize = True) self.XTest, self.YTest = self.center_test(self.XTest,self.YTest,X_mean,y_mean,X_std) self.YTest = self.YTest-y_mean
def fitting(self,XTrain, YTrain, XTest,YTest ): ###trasformazione non lineare for degree in range(2,3): poly = PolynomialFeatures(degree=degree,include_bias=False) XTrain_transf = poly.fit_transform(XTrain) XTest_transf = poly.fit_transform(XTest) ##centratura dei dati XTrain_transf, YTrain_, X_mean, y_mean, X_std = center_data(XTrain_transf, YTrain, fit_intercept=True, normalize = True) XTest_transf, YTest_ = center_test(XTest_transf,YTest,X_mean,y_mean,X_std) new_loss, _ =compute_lasso(XTrain_transf, YTrain_, XTest_transf, YTest_,score = "r2_score") print("loss polinomio grado", str(degree),":", new_loss )
def __init__(self, folder_train, folder_test, label_file, start_data_train = ["24/08/2012", "23"], end_data_train = ["31/05/2013", "23"], start_data_test = ["01/06/2013", "0"], end_data_test = ["31/12/2013", "23"], centerdata = True): files = os.listdir(folder_train) X_indexes = [0,2] self.XTrain, self.YTrain = self.extract_data(folder_train, files, label_file, start_data_train, end_data_train, X_indexes, test_flag = 0) self.XTest, self.YTest = self.extract_data(folder_test, files, label_file, start_data_test, end_data_test, X_indexes, test_flag=1) if centerdata: self.XTrain, self.YTrain, X_mean, y_mean, X_std = center_data(self.XTrain, self.YTrain, fit_intercept=True, normalize = True) self.XTest, self.YTest = self.center_test(self.XTest,self.YTest,X_mean,y_mean,X_std) self.YTest = self.YTest-y_mean dict_ = dict.fromkeys(np.arange(0,49),np.array([])) for key in (list)(dict_.keys()): dict_[key] = np.arange(key*24,key*24+24) self.dict_ = dict_
def fitting(self,XTrain, YTrain, XTest,YTest): YTrain_ = np.sqrt(YTrain) if np.isnan(YTrain_).any(): print("sqrt nan") return YTest_ = np.sqrt(YTest) if np.isnan(YTest_).any(): print("sqrt nan") return XTrain_, YTrain_, X_mean, y_mean, X_std = center_data(XTrain, YTrain_, fit_intercept=True, normalize = True) XTest_, YTest_ = center_test(XTest,YTest_,X_mean,y_mean,X_std) new_loss, _ = compute_lasso(XTrain_, YTrain_, XTest_, YTest_,score = "r2_score") print("loss sqrt(y) :", new_loss )
def fitting(self,XTrain, YTrain, XTest,YTest): XTrain_transf = 1./XTrain if np.isnan(XTrain_transf).any(): print("inverse x nan") return XTest_transf = 1./np.array(XTest) if np.isnan(XTest_transf).any(): print("inverse x nan") return XTrain_, YTrain_, X_mean, y_mean, X_std = center_data(XTrain_transf, YTrain, fit_intercept=True, normalize = True) XTest_, YTest_ = center_test(XTest_transf,YTest,X_mean,y_mean,X_std) new_loss, _ =compute_lasso(XTrain_, YTrain_, XTest_, YTest_,score = "r2_score") print("loss inverse x:", new_loss )
def cross_val(self, X, y, n_fold, n_iter, lambd, model=None): """ Perform general cross-validation :param X: Feature matrix :param y: Response :param n_fold: how many cross-val runs :param n_iter: training iterations :param lambd: reguralization parameter :param model: learning model *none* means current GraKeLasso :return: """ X, y, X_mean, y_mean, X_std = center_data(X, y, fit_intercept=True, normalize=True) train_prct = 1 - (n_fold / 100.0) n_rows = np.floor(X.shape[0] * train_prct) index = np.ones(n_rows, dtype=bool) index = np.concatenate((index, np.zeros(X.shape[0] - n_rows - 1, dtype=bool))) avg_error = 0.0 avg_theta = 0.0 for i in xrange(n_fold): np.random.shuffle(index) new_index = 1-index new_index = np.array(new_index, dtype=bool) num_test_examples = sum(new_index) if model: model.l1_ratio_ = lambd # if model has this property, i.e. ElasticNet model.fit(X[index, :], y[index]) theta = model.coef_ y_temp = np.array(y[new_index]) y_temp.shape = num_test_examples else: theta = self.train(X[index, :], y[index], lambd, n_iter) y_temp = np.array(y[new_index]) y_temp.shape = (num_test_examples, 1) y_temp.shape = num_test_examples logging.info("Theta: %s", theta) predict = np.dot(X[new_index, :], theta) errors = y_temp - predict error = np.sqrt(1/(1.0*num_test_examples)*sum(np.square(errors))) avg_error += error avg_theta += 1.0 * (len([c for c in theta if c != 0])) / (1.0 * len(theta)) avg_theta = avg_theta / (1.0 * n_fold) avg_error = avg_error / (1.0 * n_fold) return avg_error, avg_theta
def fitting(self,XTrain, YTrain, XTest,YTest): XTrain_transf = XTrain XTest_transf = XTest YTest_transf = Scalar_kernel().transform_y(XTest_transf,YTest) YTrain_transf = Scalar_kernel().transform_y(XTrain_transf,YTrain) XTrain_transf, dict_ = Scalar_kernel().transform(XTrain_transf) XTest_transf, dict_ = Scalar_kernel().transform(XTest_transf) ##centratura dei dati XTrain_transf, YTrain_, X_mean, y_mean, X_std = center_data(XTrain_transf, YTrain_transf, fit_intercept=True, normalize = True) XTest_transf, YTest_ = center_test(XTest_transf,YTest_transf,X_mean,y_mean,X_std) new_loss, _ = compute_lasso(XTrain_transf, YTrain_, XTest_transf, YTest_, score ="r2_score") print("loss enel velocita :", new_loss ) return XTrain_transf, XTest_transf, dict_
def path_scores(solver, X, y, mask, alphas, l1_ratios, train, test, solver_params, is_classif=False, n_alphas=10, eps=1E-3, key=None, debias=False, Xmean=None, screening_percentile=20., verbose=1): """Function to compute scores of different alphas in regression and classification used by CV objects Parameters ---------- X : 2D array of shape (n_samples, n_features) Design matrix, one row per sample point. y : 1D array of length n_samples Response vector; one value per sample. mask : 3D arrays of boolean Mask defining brain regions that we work on. alphas : list of floats List of regularization parameters being considered. train : array or list of integers List of indices for the train samples. test : array or list of integers List of indices for the test samples. l1_ratio : float in the interval [0, 1]; optional (default .5) Constant that mixes L1 and TV (resp. Graph-Net) penalization. l1_ratio == 0: just smooth. l1_ratio == 1: just lasso. eps : float, optional (default 1e-3) Length of the path. For example, ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``. n_alphas : int, optional (default 10). Generate this number of alphas per regularization path. This parameter is mutually exclusive with the `alphas` parameter. solver : function handle See for example tv.TVl1Classifier documentation. solver_params: dict Dictionary of param-value pairs to be passed to solver. """ if l1_ratios is None: raise ValueError("l1_ratios must be specified!") # misc _, n_features = X.shape verbose = int(verbose if verbose is not None else 0) # Univariate feature screening. Note that if we have only as few as 100 # features in the mask's support, then we should use all of them to # learn the model i.e disable this screening) do_screening = (n_features > 100) and screening_percentile < 100. if do_screening: X, mask, support = _univariate_feature_screening( X, y, mask, is_classif, screening_percentile) # crop the mask to have a tighter bounding box mask = _crop_mask(mask) # get train and test data X_train, y_train = X[train].copy(), y[train].copy() X_test, y_test = X[test].copy(), y[test].copy() # it is essential to center the data in regression X_train, y_train, _, y_train_mean, _ = center_data( X_train, y_train, fit_intercept=True, normalize=False, copy=False) # misc if isinstance(l1_ratios, numbers.Number): l1_ratios = [l1_ratios] l1_ratios = sorted(l1_ratios)[::-1] # from large to small l1_ratios best_score = -np.inf best_secondary_score = -np.inf best_l1_ratio = l1_ratios[0] best_alpha = None best_init = None all_test_scores = [] if len(test) > 0.: # do l1_ratio path for l1_ratio in l1_ratios: this_test_scores = [] # make alpha grid if alphas is None: alphas_ = _space_net_alpha_grid( X_train, y_train, l1_ratio=l1_ratio, eps=eps, n_alphas=n_alphas, logistic=is_classif) else: alphas_ = alphas alphas_ = sorted(alphas_)[::-1] # from large to small l1_ratios # do alpha path if best_alpha is None: best_alpha = alphas_[0] init = None for alpha in alphas_: # setup callback mechanism for early stopping early_stopper = _EarlyStoppingCallback( X_test, y_test, is_classif=is_classif, debias=debias, verbose=verbose) w, _, init = solver( X_train, y_train, alpha, l1_ratio, mask=mask, init=init, callback=early_stopper, verbose=max(verbose - 1, 0.), **solver_params) # We use 2 scores for model selection: the second one is to # disambiguate between regions of equivalent Spearman # correlations score, secondary_score = early_stopper.test_score(w) this_test_scores.append(score) if (np.isfinite(score) and (score > best_score or (score == best_score and secondary_score > best_secondary_score))): best_secondary_score = secondary_score best_score = score best_l1_ratio = l1_ratio best_alpha = alpha best_init = init.copy() all_test_scores.append(this_test_scores) else: if alphas is None: alphas_ = _space_net_alpha_grid( X_train, y_train, l1_ratio=best_l1_ratio, eps=eps, n_alphas=n_alphas, logistic=is_classif) else: alphas_ = alphas best_alpha = alphas_[0] # re-fit best model to high precision (i.e without early stopping, etc.) best_w, _, init = solver(X_train, y_train, best_alpha, best_l1_ratio, mask=mask, init=best_init, verbose=max(verbose - 1, 0), **solver_params) if debias: best_w = _EarlyStoppingCallback( X_test, y_test, is_classif=is_classif, debias=debias, verbose=verbose)._debias(best_w) if len(test) == 0.: all_test_scores.append(np.nan) # unmask univariate screening if do_screening: w_ = np.zeros(len(support)) if is_classif: w_ = np.append(w_, best_w[-1]) w_[:-1][support] = best_w[:-1] else: w_[support] = best_w best_w = w_ if len(best_w) == n_features: if Xmean is None: Xmean = np.zeros(n_features) best_w = np.append(best_w, 0.) all_test_scores = np.array(all_test_scores) return (all_test_scores, best_w, best_alpha, best_l1_ratio, alphas_, y_train_mean, key)
def _dense_fit(self, X, y, Xy=None, coef_init=None): # copy was done in fit if necessary X, y, X_mean, y_mean, X_std = center_data( X, y, self.fit_intercept, self.normalize, copy=False) if y.ndim == 1: y = y[:, np.newaxis] if Xy is not None and Xy.ndim == 1: Xy = Xy[:, np.newaxis] n_samples, n_features = X.shape n_targets = y.shape[1] precompute = self.precompute if hasattr(precompute, '__array__') \ and not np.allclose(X_mean, np.zeros(n_features)) \ and not np.allclose(X_std, np.ones(n_features)): # recompute Gram precompute = 'auto' Xy = None coef_ = self._init_coef(coef_init, n_features, n_targets) dual_gap_ = np.empty(n_targets) eps_ = np.empty(n_targets) l1_reg = self.alpha*self.l1_ratio * n_samples l2_reg = 0.0#self.alpha * (1.0 - self.l1_ratio) * n_samples # precompute if n_samples > n_features if hasattr(precompute, '__array__'): Gram = precompute elif precompute or (precompute == 'auto' and n_samples > n_features): Gram = np.dot(X.T, X) else: Gram = None for k in xrange(n_targets): if Gram is None: coef_[k, :], dual_gap_[k], eps_[k] = \ cd_fast.enet_coordinate_descent( coef_[k, :], l1_reg, l2_reg, X, y[:, k], self.max_iter, self.tol, True) else: Gram = Gram.copy() if Xy is None: this_Xy = np.dot(X.T, y[:, k]) else: this_Xy = Xy[:, k] coef_[k, :], dual_gap_[k], eps_[k] = \ cd_fast.enet_coordinate_descent_gram( coef_[k, :], l1_reg, l2_reg, Gram, this_Xy, y[:, k], self.max_iter, self.tol, True) if dual_gap_[k] > eps_[k]: warnings.warn('Objective did not converge for ' + 'target %d, you might want' % k + ' to increase the number of iterations') self.coef_, self.dual_gap_, self.eps_ = (np.squeeze(a) for a in (coef_, dual_gap_, eps_)) self._set_intercept(X_mean, y_mean, X_std) # return self for chaining fit and predict calls return self
y_test[y_test == 'scissors'] = 1 y_test[y_test == 'scrambledpix'] = -1 y_test = np.array(y_test.astype('double')) masker = NiftiMasker(mask_strategy='epi', standardize=True) X_train = masker.fit_transform(X_train) X_test = masker.transform(X_test) mask = masker.mask_img_.get_data().astype(np.bool) mask = _crop_mask(mask) background_img = mean_img(data_files.func[0]) X_train, y_train, _, y_train_mean, _ = center_data(X_train, y_train, fit_intercept=True, normalize=False, copy=False) X_test -= X_train.mean(axis=0) X_test /= np.std(X_train, axis=0) alpha = 1 ratio = 0.5 k = 200 solver_params = dict(tol=1e-6, max_iter=5000, prox_max_iter=100) init = None w, obj, init = tvksp_solver(X_train, y_train, alpha, ratio,
y_test[y_test=='scissors']=1 y_test[y_test=='scrambledpix']=-1 y_test=np.array(y_test.astype('double')) masker = NiftiMasker(mask_strategy='epi',standardize=True) X_train = masker.fit_transform(X_train) X_test = masker.transform(X_test) mask = masker.mask_img_.get_data().astype(np.bool) mask= _crop_mask(mask) background_img = mean_img(data_files.func[0]) X_train, y_train, _, y_train_mean, _ = center_data(X_train, y_train, fit_intercept=True, normalize=False,copy=False) X_test-=X_train.mean(axis=0) X_test/=np.std(X_train,axis=0) alpha=1 ratio=0.5 k=200 solver_params = dict(tol=1e-6, max_iter=5000,prox_max_iter=100) init=None w,obj,init=tvksp_solver(X_train,y_train,alpha,ratio,k,mask=mask,init=init,loss="logistic",verbose=1,**solver_params) coef=w[:-1] intercept=w[-1] coef_img=masker.inverse_transform(coef) y_pred=np.sign(X_test.dot(coef)+intercept)
print("transformation done") X_transf, output_dict = enel_transf.transformPerTurbineLevel( dict_sample_turb, enel_dict, X, power_curve, X_transf, output_dict ) print("transformation per turbine done") XTrain_transf = X_transf[: XTrain.shape[0], :] XTest_transf = X_transf[XTrain.shape[0] :, :] ##center data XTrain_noCenter, XVal_noCenter, YTrain_noCenter, YVal_noCenter = train_test_split( XTrain_transf, YTrain, test_size=0.33, random_state=0 ) XTrain_, YTrain_, X_mean, y_mean, X_std = center_data( XTrain_noCenter, YTrain_noCenter, fit_intercept=True, normalize=True ) XVal_, YVal_ = center_test(XVal_noCenter, YVal_noCenter, X_mean, y_mean, X_std) values_TM = [] start_loss, _ = compute_lasso(XTrain_, YTrain_, XVal_, YVal_, score="mean_squared_error", values_TM=[]) print("loss", start_loss) n_features_transf = XTrain_.shape[1] ####generation blocks r = np.random.RandomState(11) r1 = np.random.RandomState(12) r2 = np.random.RandomState(13) r4 = np.random.RandomState(15)
def test_deprecation_center_data(): n_samples = 200 n_features = 2 w = 1.0 + rng.rand(n_samples) X = rng.rand(n_samples, n_features) y = rng.rand(n_samples) param_grid = product([True, False], [True, False], [True, False], [None, w]) for (fit_intercept, normalize, copy, sample_weight) in param_grid: XX = X.copy() # such that we can try copy=False as well X1, y1, X1_mean, X1_var, y1_mean = \ center_data(XX, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy, sample_weight=sample_weight) XX = X.copy() X2, y2, X2_mean, X2_var, y2_mean = \ _preprocess_data(XX, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy, sample_weight=sample_weight) assert_array_almost_equal(X1, X2) assert_array_almost_equal(y1, y2) assert_array_almost_equal(X1_mean, X2_mean) assert_array_almost_equal(X1_var, X2_var) assert_array_almost_equal(y1_mean, y2_mean) # Sparse cases X = sparse.csr_matrix(X) for (fit_intercept, normalize, copy, sample_weight) in param_grid: X1, y1, X1_mean, X1_var, y1_mean = \ center_data(X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy, sample_weight=sample_weight) X2, y2, X2_mean, X2_var, y2_mean = \ _preprocess_data(X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy, sample_weight=sample_weight, return_mean=False) assert_array_almost_equal(X1.toarray(), X2.toarray()) assert_array_almost_equal(y1, y2) assert_array_almost_equal(X1_mean, X2_mean) assert_array_almost_equal(X1_var, X2_var) assert_array_almost_equal(y1_mean, y2_mean) for (fit_intercept, normalize) in product([True, False], [True, False]): X1, y1, X1_mean, X1_var, y1_mean = \ sparse_center_data(X, y, fit_intercept=fit_intercept, normalize=normalize) X2, y2, X2_mean, X2_var, y2_mean = \ _preprocess_data(X, y, fit_intercept=fit_intercept, normalize=normalize, return_mean=True) assert_array_almost_equal(X1.toarray(), X2.toarray()) assert_array_almost_equal(y1, y2) assert_array_almost_equal(X1_mean, X2_mean) assert_array_almost_equal(X1_var, X2_var) assert_array_almost_equal(y1_mean, y2_mean)