def test_stratified_binary_classification(): X = cp.array([[0.37487513, -2.3031888, 1.662633, 0.7671007], [-0.49796826, -1.0621182, -0.32518214, -0.20583323], [-1.0104885, -2.4997945, 2.8952584, 1.4712684], [2.008748, -2.4520662, 0.5557737, 0.07749569], [0.97350526, -0.3403474, -0.58081895, -0.23199573]]) # Needs to fail when we have just 1 occurence of a label y = cp.array([0, 0, 0, 0, 1]) with pytest.raises(ValueError): train_test_split(X, y, train_size=0.75, stratify=y, shuffle=True) y = cp.array([0, 0, 0, 1, 1]) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, stratify=y, random_state=15) _, y_counts = cp.unique(y, return_counts=True) _, train_counts = cp.unique(y_train, return_counts=True) _, test_counts = cp.unique(y_test, return_counts=True) # Ensure we have preserve the number of labels cp.testing.assert_array_equal(train_counts + test_counts, y_counts)
def train_test_split(X, y=None, shuffle=True, random_state=None, stratify=None, **kwargs): if y is not None and str(y.dtype) == 'object': return _train_test_split_with_object(X, y, shuffle=shuffle, random_state=random_state, stratify=stratify, **kwargs) try: return cm_sel.train_test_split(X, y, shuffle=shuffle, random_state=random_state, stratify=stratify, **kwargs) except Exception as e: if stratify is not None and str(e).find('cudaErrorInvalidValue') >= 0: logger.warning('train_test_split failed, retry without stratify') return cm_sel.train_test_split(X, y, shuffle=shuffle, random_state=random_state, **kwargs) else: raise
def test_random_state(seed_type): for i in range(10): seed_n = np.random.randint(0, int(1e9)) if seed_type == 'int': seed = seed_n if seed_type == 'cupy': seed = cp.random.RandomState(seed=seed_n) if seed_type == 'numpy': seed = np.random.RandomState(seed=seed_n) X = cudf.DataFrame({"x": range(100)}) y = cudf.Series(([0] * (100 // 2)) + ([1] * (100 // 2))) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed) if seed_type == 'cupy': seed = cp.random.RandomState(seed=seed_n) if seed_type == 'numpy': seed = np.random.RandomState(seed=seed_n) X_train2, X_test2, y_train2, y_test2 = \ train_test_split(X, y, random_state=seed) assert X_train.equals(X_train2) assert X_test.equals(X_test2) assert y_train.equals(y_train2) assert y_test.equals(y_test2)
def test_split_dataframe_array(y_type): X = cudf.DataFrame({"x": range(100)}) y = cudf.Series(([0] * (100 // 2)) + ([1] * (100 // 2))) if y_type == "cupy": X_train, X_test, y_train, y_test = train_test_split(X, y.values) assert isinstance(X_train, cudf.DataFrame) assert isinstance(X_test, cudf.DataFrame) assert isinstance(y_train, cp.ndarray) assert isinstance(y_test, cp.ndarray) elif y_type == "cudf": X_train, X_test, y_train, y_test = train_test_split(X, y) assert isinstance(X_train, cudf.DataFrame) assert isinstance(X_test, cudf.DataFrame) assert isinstance(y_train, cudf.Series) assert isinstance(y_test, cudf.Series)
def make_classification_dataset(datatype, nrows, ncols, nclasses): n_real_features = min(ncols, int(max(nclasses * 2, math.ceil(ncols / 10)))) n_clusters_per_class = min(2, max(1, int(2**n_real_features / nclasses))) n_redundant = min(ncols - n_real_features, max(2, math.ceil(ncols / 20))) try: X, y = data.make_classification( dtype=datatype, n_samples=nrows + 1000, n_features=ncols, random_state=SEED, class_sep=1.0, n_informative=n_real_features, n_clusters_per_class=n_clusters_per_class, n_redundant=n_redundant, n_classes=nclasses) r = dsel.train_test_split(X, y, random_state=SEED, train_size=nrows) if len(cp.unique(r[2])) < nclasses: raise ValueError("Training data does not have all classes.") return r except ValueError: pytest.skip( "Skipping the test for invalid combination of ncols/nclasses")
def train_and_eval(X_param, y_param, penalty='l2', C=1.0, l1_ratio=None, fit_intercept=True): """ Splits the given data into train and test split to train and evaluate the model for the params parameters. Params ______ X_param: DataFrame. The data to use for training and testing. y_param: Series. The label for training penalty, C, l1_ratio, fit_intercept: The parameter values for Logistic Regression. Returns score: log loss of the fitted model """ X_train, X_valid, y_train, y_valid = train_test_split(X_param, y_param, random_state=42) classifier = LogisticRegression(penalty=penalty, C=C, l1_ratio=l1_ratio, fit_intercept=fit_intercept) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_valid) score = log_loss(y_valid, y_pred) return score
def test_split_array_single_argument(type, test_size, train_size, shuffle): X = np.zeros((100, 10)) + np.arange(100).reshape(100, 1) if type == 'cupy': X = cp.asarray(X) if type == 'numba': X = cuda.to_device(X) X_train, X_test = train_test_split(X, train_size=train_size, test_size=test_size, shuffle=shuffle, random_state=0) if type == 'cupy': assert isinstance(X_train, cp.ndarray) assert isinstance(X_test, cp.ndarray) if type in ['numba', 'rmm']: assert cuda.devicearray.is_cuda_ndarray(X_train) assert cuda.devicearray.is_cuda_ndarray(X_test) if train_size is not None: assert X_train.shape[0] == (int)(X.shape[0] * train_size) if test_size is not None: assert X_test.shape[0] == (int)(X.shape[0] * test_size) if shuffle is None: assert X_train == X[0:train_size] assert X_test == X[-1 * test_size:] X_rec = cp.sort(cp.concatenate(X_train, X_test)) assert X_rec == X
def test_pipeline(): X, y = make_classification(random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) pipe = Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())]) pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) assert score > 0.8
def test_split_column(): data = cudf.DataFrame( { "x": range(100), "y": ([0] * (100 // 2)) + ([1] * (100 // 2)), } ) train_size = 0.8 X_train, X_test, y_train, y_test = train_test_split( data, "y", train_size=train_size ) assert ( len(X_train) == len(y_train) == pytest.approx(train_size * len(data)) ) assert ( len(X_test) == len(y_test) == pytest.approx((1 - train_size) * len(data)) ) X_reconstructed = cudf.concat([X_train, X_test]).sort_values( by=["x"] ) y_reconstructed = y_train.append(y_test).sort_values() assert all( data == X_reconstructed.assign( y=y_reconstructed).reset_index(drop=True) )
def test_sklearn_search(): """Test ensures scoring function works with sklearn machinery """ import numpy as np from cuml import Ridge as cumlRidge import cudf from sklearn import datasets from sklearn.model_selection import train_test_split, GridSearchCV diabetes = datasets.load_diabetes() X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target, test_size=0.2, shuffle=False, random_state=1) alpha = np.array([1.0]) fit_intercept = True normalize = False params = {'alpha': np.logspace(-3, -1, 10)} cu_clf = cumlRidge(alpha=alpha, fit_intercept=fit_intercept, normalize=normalize, solver="eig") assert getattr(cu_clf, 'score', False) sk_cu_grid = GridSearchCV(cu_clf, params, cv=5, iid=False) gdf_data = cudf.DataFrame(X_train) gdf_train = cudf.DataFrame(dict(train=y_train)) sk_cu_grid.fit(gdf_data, gdf_train.train) assert sk_cu_grid.best_params_ == {'alpha': 0.1}
def make_regression_dataset(datatype, nrows, ncols): ninformative = max(min(ncols, 5), int(math.ceil(ncols / 5))) X, y = data.make_regression(dtype=datatype, n_samples=nrows + 1000, n_features=ncols, random_state=SEED, n_informative=ninformative) return dsel.train_test_split(X, y, random_state=SEED, train_size=nrows)
def test_split_df_single_argument(test_size, train_size, shuffle): X = cudf.DataFrame({'x': range(50)}) X_train, X_test = train_test_split(X, train_size=train_size, test_size=test_size, shuffle=shuffle, random_state=0) if train_size is not None: assert X_train.shape[0] == (int)(X.shape[0] * train_size) if test_size is not None: assert X_test.shape[0] == (int)(X.shape[0] * test_size)
def load_data(fpath): """ Simple helper function for loading data to be used by CPU/GPU models. :param fpath: Path to the data to be ingested :return: DataFrame wrapping the data at [fpath]. Data will be in either a Pandas or RAPIDS (cuDF) DataFrame """ import cudf df = cudf.read_parquet(fpath) X = df.drop(["ArrDelayBinary"], axis=1) y = df["ArrDelayBinary"].astype("int32") return train_test_split(X, y, test_size=0.2)
def test_stratified_random_seed(seed_type): for i in range(10): seed_n = np.random.randint(0, int(1e9)) if seed_type == 'int': seed = seed_n if seed_type == 'cupy': seed = cp.random.RandomState(seed=seed_n) if seed_type == 'numpy': seed = np.random.RandomState(seed=seed_n) X = cudf.DataFrame({"x": range(100)}) y = cudf.Series(([0] * (100 // 2)) + ([1] * (100 // 2))) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed, stratify=y) if seed_type == 'cupy': seed = cp.random.RandomState(seed=seed_n) if seed_type == 'numpy': seed = np.random.RandomState(seed=seed_n) X_train2, X_test2, y_train2, y_test2 = \ train_test_split(X, y, random_state=seed, stratify=y) assert X_train.equals(X_train2) assert X_test.equals(X_test2) assert y_train.equals(y_train2) assert y_test.equals(y_test2) # Ensure that data is shuffled assert not (X.head().index.values == X_train.head().index.values).all() def monotonic_inc(x): dx = cp.diff(x.values, axis=0) return cp.all(dx == 1) assert not monotonic_inc(X_train)
def _preprocess_data(self, train_data, labels, batch_size, train_size, truncate): train_gdf = cudf.DataFrame() train_gdf["domain"] = train_data train_gdf["type"] = labels domain_train, domain_test, type_train, type_test = train_test_split( train_gdf, "type", train_size=train_size) test_df = self._create_df(domain_test, type_test) train_df = self._create_df(domain_train, type_train) test_dataset = DGADataset(test_df, truncate) train_dataset = DGADataset(train_df, truncate) test_dataloader = DataLoader(test_dataset, batchsize=batch_size) train_dataloader = DataLoader(train_dataset, batchsize=batch_size) return train_dataloader, test_dataloader
def test_stratify_retain_index(test_size, train_size): X = cudf.DataFrame({"x": range(10)}) y = cudf.Series(([0] * (10 // 2)) + ([1] * (10 // 2))) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, test_size=test_size, shuffle=True, stratify=True) assert (X_train["x"] == X_train.index).all() assert (X_test["x"] == X_test.index).all() if train_size is not None: assert X_train.shape[0] == (int)(X.shape[0] * train_size) elif test_size is not None: assert X_test.shape[0] == (int)(X.shape[0] * test_size)
def split_dataset(self, dataset, random_state): """ Split dataset into train and test data subsets, currently using CV-fold index for randomness. Plan to refactor with sklearn KFold """ hpo_log.info('> train-test split') label_column = self.hpo_config.label_column X_train, X_test, y_train, y_test = \ train_test_split(dataset, label_column, random_state=random_state) return (X_train.astype(self.hpo_config.dataset_dtype), X_test.astype(self.hpo_config.dataset_dtype), y_train.astype(self.hpo_config.dataset_dtype), y_test.astype(self.hpo_config.dataset_dtype))
def test_default_values(): X = np.zeros((100, 10)) + np.arange(100).reshape(100, 1) y = np.arange(100).reshape(100, 1) X = cp.asarray(X) y = cp.asarray(y) X_train, X_test, y_train, y_test = train_test_split(X, y) assert isinstance(X_train, cp.ndarray) assert isinstance(X_test, cp.ndarray) assert isinstance(y_train, cp.ndarray) assert isinstance(y_test, cp.ndarray) assert X_train.shape[0] == X.shape[0] * 0.75 assert y_train.shape[0] == y.shape[0] * 0.75 assert X_test.shape[0] == X.shape[0] * 0.25 assert y_test.shape[0] == y.shape[0] * 0.25
def load_data(fpath): """ Simple helper function for loading data to be used by CPU/GPU models. :param fpath: Path to the data to be ingested :return: DataFrame wrapping the data at [fpath]. Data will be in either a Pandas or RAPIDS (cuDF) DataFrame """ import cudf if (fpath.startswith('gs://')): fs = gcsfs.GCSFileSystem() with fs.open(fpath, mode='rb') as f: df = cudf.read_parquet(f) else: df = cudf.read_parquet(fpath) X = df.drop(["ArrDelayBinary"], axis=1) y = df["ArrDelayBinary"].astype("int32") return train_test_split(X, y, test_size=0.2)
def test_stratify_any_input(test_size, train_size): X = cudf.DataFrame({"x": range(10)}) X['test_col'] = cudf.Series([10, 0, 0, 10, 10, 10, 0, 0, 10, 10]) y = cudf.Series(([0] * (10 // 2)) + ([1] * (10 // 2))) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, test_size=test_size, shuffle=True, stratify=X['test_col'], random_state=15) assert (X_train["x"].to_numpy() == X_train.index.to_numpy()).all() assert (X_test["x"].to_numpy() == X_test.index.to_numpy()).all() if train_size is not None: assert X_train.shape[0] == (int)(X.shape[0] * train_size) elif test_size is not None: assert X_test.shape[0] == (int)(X.shape[0] * test_size)
def test_split_dataframe(train_size, shuffle): X = cudf.DataFrame({"x": range(100)}) y = cudf.Series(([0] * (100 // 2)) + ([1] * (100 // 2))) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, shuffle=shuffle) assert len(X_train) == len(y_train) == pytest.approx(train_size * len(X)) assert (len(X_test) == len(y_test) == pytest.approx( (1 - train_size) * len(X))) assert (all(X_train.index.to_pandas() == y_train.index.to_pandas())) assert (all(X_test.index.to_pandas() == y_test.index.to_pandas())) X_reconstructed = cudf.concat([X_train, X_test]).sort_values(by=["x"]) y_reconstructed = y_train.append(y_test).sort_values() assert all(X_reconstructed.reset_index(drop=True) == X) out = y_reconstructed.reset_index(drop=True).values_host == y.values_host assert all(out)
def test_train_model(): if torch.cuda.is_available(): fake = Faker() email_col = [fake.text() for _ in range(200)] label_col = [random.randint(0, 1) for _ in range(200)] emails_gdf = cudf.DataFrame(list(zip(email_col, label_col)), columns=["email", "label"]) X_train, X_test, y_train, y_test = train_test_split( emails_gdf, "label", train_size=0.8, random_state=10 ) sc.train_model( X_train["email"], y_train, learning_rate=3e-5, max_seq_len=128, batch_size=6, epochs=1, ) assert isinstance( sc._model.module, transformers.models.bert.modeling_bert.BertForSequenceClassification, )
def test_stratified_split(type, test_size, train_size): # For more tolerance and reliable estimates X, y = make_classification(n_samples=10000) if type == 'cupy': X = cp.asarray(X) y = cp.asarray(y) if type == 'numba': X = cuda.to_device(X) y = cuda.to_device(y) def counts(y): _, y_indices = cp.unique(y, return_inverse=True) class_counts = cp.bincount(y_indices) total = cp.sum(class_counts) percent_counts = [] for count in (class_counts): percent_counts.append( cp.around(float(count) / total.item(), decimals=2).item()) return percent_counts X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, test_size=test_size, stratify=y) original_counts = counts(y) split_counts = counts(y_train) assert cp.isclose(original_counts, split_counts, equal_nan=False, rtol=0.1).all() if type == 'cupy': assert isinstance(X_train, cp.ndarray) assert isinstance(X_test, cp.ndarray) if type in ['numba']: assert cuda.devicearray.is_cuda_ndarray(X_train) assert cuda.devicearray.is_cuda_ndarray(X_test)
def test_hinge_loss(nrows, ncols, n_info, input_type, n_classes): train_rows = np.int32(nrows * 0.8) X, y = make_classification(n_samples=nrows, n_features=ncols, n_clusters_per_class=1, n_informative=n_info, random_state=123, n_classes=n_classes) if input_type == "cudf": X = cudf.DataFrame(X) y = cudf.Series(y) elif input_type == "cupy": X = cp.asarray(X) y = cp.asarray(y) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_rows, shuffle=True) cuml_model = cu_log() cuml_model.fit(X_train, y_train) cu_predict_decision = cuml_model.decision_function(X_test) cu_loss = cuml_hinge(y_test, cu_predict_decision.T, labels=cp.unique(y)) if input_type == "cudf": y_test = y_test.to_array() y = y.to_array() cu_predict_decision = cp.asnumpy(cu_predict_decision.values) elif input_type == "cupy": y = cp.asnumpy(y) y_test = cp.asnumpy(y_test) cu_predict_decision = cp.asnumpy(cu_predict_decision) cu_loss_using_sk = sk_hinge(y_test, cu_predict_decision.T, labels=np.unique(y)) # compare the accuracy of the two models cp.testing.assert_array_almost_equal(cu_loss, cu_loss_using_sk)
import joblib import os import cuml from cuml.benchmark.datagen import load_higgs OUTPUT_DIR = './outputs/' os.makedirs(OUTPUT_DIR, exist_ok=True) X, y = load_higgs() N_ROWS = 1000000 run = Run.get_context() client = ExplanationClient.from_run(run) run.log('N_ROWS', N_ROWS) X_train, X_test, y_train, y_test = train_test_split(X[:N_ROWS], y[:N_ROWS], random_state=1) # write x_test out as a pickle file for later visualization x_test_pkl = 'x_test.pkl' with open(x_test_pkl, 'wb') as file: joblib.dump(value=X_test, filename=os.path.join(OUTPUT_DIR, x_test_pkl)) run.upload_file('x_test_higgs.pkl', os.path.join(OUTPUT_DIR, x_test_pkl)) gamma = 0.001 C = 100. # Use SVC algorithm to create a model reg = cuml.svm.SVC(C=C, gamma=gamma, probability=True) model = reg.fit(X_train, y_train) # preds = reg.predict(X_test)
def train_model(self, train_gdf, cat_cols, cont_cols, label_col, batch_size, epochs, lr=0.01, wd=0.0): """ This function is used for training fastai tabular model with a given training dataset. :param train_gdf: training dataset with categorized and/or continuous feature columns :type train_gdf: cudf.DataFrame :param cat_cols: array of categorical column names in train_gdf :type label_col: array :param cont_col: array of continuous column names in train_gdf :type label_col: array :param label_col: column name of label column in train_gdf :type label_col: str :param batch_size: train_gdf will be partitioned into multiple dataframes of this size :type batch_size: int :param epochs: number of epochs to be adjusted depending on convergence for a specific dataset :type epochs: int :param lr: learning rate :type lr: float :param wd: wd :type wd: float Examples -------- >>> from clx.analytics.asset_classification import AssetClassification >>> ac = AssetClassification() >>> cat_cols = ["1", "2", "3", "4", "5", "6", "7", "8", "9"] >>> cont_cols = ["10"] >>> ac.train_model(X_train, cat_cols, cont_cols, "label", batch_size, epochs, lr=0.01, wd=0.0) """ self._cat_cols = cat_cols self._cont_cols = cont_cols # train/test split X, val_X, Y, val_Y = train_test_split(train_gdf, label_col, train_size=0.9) val_X.index = val_Y.index X.index = Y.index embedded_cols = {} for col in cat_cols: if col != label_col: categories_cnt = X[col].max() + 2 if categories_cnt > 1: embedded_cols[col] = categories_cnt X[label_col] = Y val_X[label_col] = val_Y # Embedding embedding_sizes = [(n_categories, min(100, (n_categories + 1) // 2)) for _, n_categories in embedded_cols.items()] n_cont = len(cont_cols) out_sz = train_gdf[label_col].nunique() # Partition dataframes train_part_dfs = self._get_partitioned_dfs(X, batch_size) val_part_dfs = self._get_partitioned_dfs(val_X, batch_size) self._model = TabularModel(embedding_sizes, n_cont, out_sz, self._layers, self._drops, self._emb_drop, self._is_reg, self._is_multi, self._use_bn) self._to_device(self._model, self._device) self._config_optimizer() for i in range(epochs): loss = self._train(self._model, self._optimizer, train_part_dfs, cat_cols, cont_cols, label_col) print("training loss: ", loss) self._val_loss(self._model, val_part_dfs, cat_cols, cont_cols, label_col)
def classification_dataset(request): X, y = make_classification(n_samples=10, n_features=5, random_state=0) return train_test_split(X, y, random_state=0)
def test_split_invalid_proportion(train_size): X = cudf.DataFrame({'x': range(10)}) y = cudf.Series([0] * 10) with pytest.raises(ValueError): train_test_split(X, y, train_size=train_size)
def test_split_size_mismatch(): X = cudf.DataFrame({'x': range(3)}) y = cudf.Series([0, 1]) with pytest.raises(ValueError): train_test_split(X, y)
def test_qn(loss, dtype, penalty, l1_strength, l2_strength, fit_intercept): if penalty == "none" and (l1_strength > 0 or l2_strength > 0): pytest.skip("`none` penalty does not take l1/l2_strength") tol = 1e-6 qn = cuQN(loss=loss, fit_intercept=fit_intercept, l1_strength=l1_strength, l2_strength=l2_strength, tol=1e-8, output_type="cupy") if loss == 'softmax': X, y = make_classification(n_samples=5000, n_informative=10, n_features=20, n_classes=4, dtype=dtype) stratify = y.astype(dtype) X_train, X_test, y_train, y_test = train_test_split(X.astype(dtype), y.astype(dtype), stratify=stratify) most_class = cp.unique(y)[cp.argmax(cp.bincount(y))] baseline_preds = cp.array([most_class] * y_test.shape[0], dtype=dtype) baseline_score = accuracy_score(y_test, baseline_preds) y_pred = qn.fit(X_train, y_train).predict(X_test) cuml_score = accuracy_score(y_test, y_pred) assert (cuml_score > baseline_score) assert (cuml_score >= 0.50) elif loss == 'sigmoid': X = np.array(precomputed_X, dtype=dtype) y = np.array(precomputed_y_log, dtype=dtype) qn.fit(X, y) print(qn.objective) print(qn.coef_) if penalty == 'none' and l1_strength == 0.0 and l2_strength == 0.0: if fit_intercept: assert (qn.objective - 0.40263831615448) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array([[-2.1088872], [2.4812558]]), decimal=3) else: assert (qn.objective - 0.4317452311515808) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array([[-2.120777], [3.056865]]), decimal=3) elif penalty == 'l1' and l2_strength == 0.0: if fit_intercept: if l1_strength == 0.0: assert (qn.objective - 0.40263831615448) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-2.1088872], [2.4812558]]), decimal=3) else: assert (qn.objective - 0.44295936822891235) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.6899368], [1.9021575]]), decimal=3) else: if l1_strength == 0.0: assert (qn.objective - 0.4317452311515808) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array([[-2.120777], [3.056865] ]), decimal=3) else: assert (qn.objective - 0.4769895672798157) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.6214856], [2.3650239]]), decimal=3) # assert False elif penalty == 'l2' and l1_strength == 0.0: if fit_intercept: if l2_strength == 0.0: assert (qn.objective - 0.40263831615448) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-2.1088872], [2.4812558]]), decimal=3) else: assert (qn.objective - 0.43780848383903503) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.5337948], [1.678699]]), decimal=3) else: if l2_strength == 0.0: assert (qn.objective - 0.4317452311515808) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array([[-2.120777], [3.056865] ]), decimal=3) else: assert (qn.objective - 0.4750209450721741) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.3931049], [2.0140104]]), decimal=3) if penalty == 'elasticnet': if fit_intercept: if l1_strength == 0.0 and l2_strength == 0.0: assert (qn.objective - 0.40263831615448) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-2.1088872], [2.4812558]]), decimal=3) elif l1_strength == 0.0: assert (qn.objective - 0.43780848383903503) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.5337948], [1.678699]]), decimal=3) elif l2_strength == 0.0: assert (qn.objective - 0.44295936822891235) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.6899368], [1.9021575]]), decimal=3) else: assert (qn.objective - 0.467987984418869) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.3727235], [1.4639963]]), decimal=3) else: if l1_strength == 0.0 and l2_strength == 0.0: assert (qn.objective - 0.4317452311515808) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array([[-2.120777], [3.056865] ]), decimal=3) elif l1_strength == 0.0: assert (qn.objective - 0.4750209450721741) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.3931049], [2.0140104]]), decimal=3) elif l2_strength == 0.0: assert (qn.objective - 0.4769895672798157) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.6214856], [2.3650239]]), decimal=3) else: assert (qn.objective - 0.5067970156669617) < tol cp.testing.assert_array_almost_equal(qn.coef_, np.array( [[-1.2102532], [1.752459]]), decimal=3) print()