def test_learn_model_type(): inputer = Inputers(description_filepath= "../../descriptions/pre/inputers/pima-diabetes.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/RFC.yaml") learner.train(X, y, checkpoint="RandomForest.ckp") assert learner.model_type == "Classification"
def test_imputer_feature_boston_err(): o = Inputers( description_filepath="../../descriptions/pre/inputers/boston.yaml") train = o.transform(dataset="train") train["ggoo"] = np.nan imp = Imputers(description_filepath= "../../descriptions/pre/cleaners/most_frequent_impute.yaml") with pytest.raises(PasoError): assert imp.transform(train, inplace=True, verbose=True).shape == (506, 14)
def test_evaluate_pima(): inputer = Inputers(description_filepath= "../../descriptions/pre/inputers/pima-diabetes.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/LGBC.yaml") learner.train(X, y, checkpoint="pima_LGBMC.ckp") assert len(learner.evaluate(X, y).keys()) == 9
def test_booL_some_not_true(City): o = Inputers( description_filepath="../../descriptions/pre/inputers/yeast3.yaml") train = o.transform(dataset="train") train["Mcg"] = True train.loc[0, "Mcg"] = False train["Alm"] = False train.loc[0, "Alm"] = True assert (boolean_to_integer(train, inplace=True).loc[0, "Alm"] == 1 and train.loc[0, "Mcg"] == 0)
def test_imputer_features_nans_found(): o = Inputers( description_filepath="../../descriptions/pre/inputers/yeast3.yaml") train = o.transform(dataset="train") train["bad"] = np.NaN imp = Imputers(description_filepath= "../../descriptions/pre/cleaners/most_frequent_impute.yaml") assert (train.paso_impute(imp, features=["Alm"], inplace=True, verbose=True).isnull().any().any()) == True
def test_imputer_features_not_found(): o = Inputers( description_filepath="../../descriptions/pre/inputers/yeast3.yaml") train = o.transform(dataset="train") imp = Imputers(description_filepath= "../../descriptions/pre/cleaners/most_frequent_impute.yaml") with pytest.raises(PasoError): train = imp.transform(train, features=["bad", "badder", "Alm"], inplace=True, verbose=True)
def test_spltter_transform_creditcard_20__url_cvs_zip(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/creditcard.yaml" ) train = inputer.transform(dataset="train") y = train[inputer.target].values X = train[train.columns.difference([inputer.target])] splitter = Splitters( description_filepath="../../descriptions/pre/inputers/split_20_stratify.yaml" ) train, valid, _, _ = splitter.transform(X, y) assert train.shape == (227845, 30) and valid.shape == (56962, 30)
def test_imputer_features_allnans(): o = Inputers( description_filepath="../../descriptions/pre/inputers/yeast3.yaml") train = o.transform(dataset="train") train["Alm"] = np.NaN train.loc[0, "Alm"] = 0 imp = Imputers(description_filepath= "../../descriptions/pre/cleaners/most_frequent_impute.yaml") with pytest.raises(PasoError): assert (imp.transform(train, features=["Alm"], inplace=True, verbose=True).isnull().all().all()) == False
def test_predict_Prob_error(): inputer = Inputers(description_filepath= "../../descriptions/pre/inputers/pima-diabetes.yaml") diabetes = inputer.transform() learner = Learners( description_filepath="../../descriptions/learners/LGBC.yaml") # learner.train( # diabetes, target=inputer.target, checkpoint="diabetesLGBMC.ckp" # ) X = diabetes X_train = X[X.columns.difference([inputer.target])] with pytest.raises(PasoError): assert learner.predict_proba(X_train).shape == (768, 2)
def test_splitter_transform__onto_group(): inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml") train = inputer.transform() y = train[inputer.target].values X = train[train.columns.difference([inputer.target])] splitter = Splitters( description_filepath="../../descriptions/pre/inputers/split_20_stratify.yaml" ) train, valid, y_train, y_valid = splitter.transform(X, y) assert ( train.shape == (120, 4) and valid.shape == (30, 4) and y_train.shape == (120,) and y_valid.shape == (30,) )
def test_spitter_transform_s_wine(): inputer = Inputers(description_filepath="../../descriptions/pre/inputers/wine.yaml") train = inputer.transform() y = train[inputer.target].values X = train[train.columns.difference([inputer.target])] splitter = Splitters( description_filepath="../../descriptions/pre/inputers/split_30_stratify.yaml" ) train, valid, y_train, y_valid = splitter.transform(X, y) assert ( train.shape == (124, 13) and valid.shape == (54, 13) and y_train.shape == (124,) and y_valid.shape == (54,) )
def test_spltter_transform_creditcard_url_30__cvs_zip(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/yeast3.yaml" ) train = inputer.transform(dataset="train") y = train[inputer.target].values X = train[train.columns.difference([inputer.target])] splitter = Splitters( description_filepath="../../descriptions/pre/inputers/split_20_stratify.yaml" ) train, valid, y_train, y_valid = splitter.transform(X, y) assert ( train.shape == (1187, 8) and valid.shape == (297, 8) and y_train.shape == (1187,) and y_valid.shape == (297,) )
def test_learner_cross_validate_RFC_iris_milticlass_evaluate_test_accuracy(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/iris.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/RFC.yaml") learner.train(X, y, checkpoint="pima_LGBMC.ckp") score = learner.cross_validate( X, y, cv_description_filepath= "../../descriptions/learners/Cross_validation_classification.yaml", ) assert score["mean"]["test_accuracy"] >= 0.95
def test_learner_cross_validate_RFC_iris_multiclass_evaluate_AO(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/iris.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/RFC.yaml") learner.train(X, y, checkpoint="iris)RC.ckp") learner.cross_validate( X, y, cv_description_filepath= "../../descriptions/learners/Cross_validation_classification.yaml", ) assert learner.evaluate(X, y)["accuracy"] == 1.0
def test_inputer_transform_cvs_url(): link = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data" names = [ "Class", "Alcohol", "Malic-acid", "Ash", "Alcalinity-ash", "Magnesium", "phenols", "Flavanoids", "Nonflavanoid-phenols", "Proanthocyanins", "Color-intensity", "Hue", "OD280-OD315-diluted-wines", "Proline", ] winmeo = pd.read_csv(link, names=names).head() inputer = Inputers(description_filepath="../../descriptions/pre/inputers/wine.yaml") assert (inputer.transform().columns == winmeo.columns).any()
def test_inputer_transform_exec(flower): # descriptions inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml") assert (inputer.transform() == flower).any().any()
def test_inputer_train_bad_file(flower): o = Inputers(description_filepath="../../descriptions/pre/inputers/bad.yaml") with pytest.raises(PasoError): _ = o.transform()
def test_inputer_transform_ontological_bad_description_filepath(flower): o = Inputers(description_filepath="../../descriptions/inputers/XXXX.yaml") with pytest.raises(PasoError): Flower = o.transform()
def test_inputer_bad_kind(): # descriptions inputer = Inputers( description_filepath="../../descriptions/pre/inputers/otto_group_bad4.yaml" ) with pytest.raises(PasoError): assert (inputer.transform() == 1)
def test_inputer_create_data(): o = Inputers( description_filepath="../../descriptions/pre/inputers/create-data.yaml" ) train = o.transform(dataset="train") assert train.shape == (1000, 3)
def test_inputer_pima(): o = Inputers( description_filepath="../../descriptions/pre/inputers/pima-diabetes.yaml" ) train = o.transform(dataset="train") assert train.shape == (768, 9)
def test_inputer_bad_keyword(flower): # descriptions inputer = Inputers( description_filepatsh="../../descriptions/pre/inputers/iris.yaml" ) with pytest.raises(PasoError): assert (inputer.transform() == flower).any().any()
def test_inputer_yeast3_cvs_zip(): o = Inputers(description_filepath="../../descriptions/pre/inputers/yeast3.yaml") train = o.transform(dataset="train") assert train.shape == (1484, 9)
def test_inputer_creditcard_url_cvs_zip(): o = Inputers(description_filepath="../../descriptions/pre/inputers/creditcard.yaml") train = o.transform(dataset="train") assert train.shape == (284807, 31)
def test_inputer_transform_flower(flower): o = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml") Flower = o.transform() assert Flower.shape == flower.shape
def test_inputer_transform_dataset_setting_bad(flower): inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml") with pytest.raises(PasoError): assert (inputer.transform(dataset="test") == flower).any().any()
def test_inputer_transform_wine(): o = Inputers(description_filepath="../../descriptions/pre/inputers/wine.yaml") Wine = o.transform() assert Wine.shape == (178, 14)
def test_inputer_otto_groupsample_Submission(): o = Inputers(description_filepath="../../descriptions/pre/inputers/otto_group.yaml") sampleSubmission = o.transform(dataset="sampleSubmission") assert sampleSubmission.shape == (144368, 10)
def test_inputer_otto_group_test(): o = Inputers(description_filepath="../../descriptions/pre/inputers/otto_group.yaml") test = o.transform(dataset="test") assert test.shape == (144368, 94)
def test_inputer_transform_otto_group(): o = Inputers(description_filepath="../../descriptions/pre/inputers/otto_group.yaml") otto_group = o.transform() assert otto_group.shape == (61878, 95)