def test_predict_proba2(): """Assert that the TPOT predict_proba function returns a numpy matrix filled with probabilities (float)""" tpot_obj = TPOTClassifier() pipeline_string = ( 'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini' ', DecisionTreeClassifier__max_depth=8,DecisionTreeClassifier__min_samples_leaf=5,' 'DecisionTreeClassifier__min_samples_split=5)') tpot_obj._optimized_pipeline = creator.Individual.from_string( pipeline_string, tpot_obj._pset) tpot_obj._fitted_pipeline = tpot_obj._toolbox.compile( expr=tpot_obj._optimized_pipeline) tpot_obj._fitted_pipeline.fit(training_features, training_classes) result = tpot_obj.predict_proba(testing_features) rows = result.shape[0] columns = result.shape[1] try: for i in range(rows): for j in range(columns): float_range(result[i][j]) assert True except Exception: assert False
def test_imputer_in_export(): """Assert that TPOT exports a pipeline with an imputation step if imputation was used in fit().""" tpot_obj = TPOTClassifier( random_state=42, population_size=1, offspring_size=2, generations=1, verbosity=0, config_dict='TPOT light' ) features_with_nan = np.copy(training_features) features_with_nan[0][0] = float('nan') tpot_obj.fit(features_with_nan, training_target) # use fixed pipeline since the random.seed() performs differently in python 2.* and 3.* pipeline_string = ( 'KNeighborsClassifier(' 'input_matrix, ' 'KNeighborsClassifier__n_neighbors=10, ' 'KNeighborsClassifier__p=1, ' 'KNeighborsClassifier__weights=uniform' ')' ) tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) export_code = export_pipeline(tpot_obj._optimized_pipeline, tpot_obj.operators, tpot_obj._pset, tpot_obj._imputed) expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier try: from sklearn.impute import SimpleImputer as Imputer except ImportError: from sklearn.preprocessing import Imputer # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['target'].values, random_state=None) imputer = Imputer(strategy="median") imputer.fit(training_features) training_features = imputer.transform(training_features) testing_features = imputer.transform(testing_features) exported_pipeline = KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) """ assert_equal(export_code, expected_code)
def test_predict_2(): """Assert that the TPOT predict function returns a numpy matrix of shape (num_testing_rows,)""" tpot_obj = TPOTClassifier() tpot_obj._optimized_pipeline = creator.Individual.\ from_string('DecisionTreeClassifier(input_matrix)', tpot_obj._pset) tpot_obj._fitted_pipeline = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline) tpot_obj._fitted_pipeline.fit(training_features, training_classes) result = tpot_obj.predict(testing_features) assert result.shape == (testing_features.shape[0],)
def test_predict_2(): """Assert that the TPOT predict function returns a numpy matrix of shape (num_testing_rows,)""" tpot_obj = TPOTClassifier() tpot_obj._optimized_pipeline = creator.Individual.\ from_string('DecisionTreeClassifier(input_matrix)', tpot_obj._pset) tpot_obj._fitted_pipeline = tpot_obj._toolbox.compile( expr=tpot_obj._optimized_pipeline) tpot_obj._fitted_pipeline.fit(training_features, training_classes) result = tpot_obj.predict(testing_features) assert result.shape == (testing_features.shape[0], )
def test_imputer_in_export(): """Assert that TPOT exports a pipeline with an imputation step if imputation was used in fit().""" tpot_obj = TPOTClassifier( random_state=42, population_size=1, offspring_size=2, generations=1, verbosity=0, config_dict='TPOT light' ) features_with_nan = np.copy(training_features) features_with_nan[0][0] = float('nan') tpot_obj.fit(features_with_nan, training_target) # use fixed pipeline since the random.seed() performs differently in python 2.* and 3.* pipeline_string = ( 'KNeighborsClassifier(' 'input_matrix, ' 'KNeighborsClassifier__n_neighbors=10, ' 'KNeighborsClassifier__p=1, ' 'KNeighborsClassifier__weights=uniform' ')' ) tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) export_code = export_pipeline(tpot_obj._optimized_pipeline, tpot_obj.operators, tpot_obj._pset, tpot_obj._imputed) expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.preprocessing import Imputer # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['target'].values, random_state=None) imputer = Imputer(strategy="median") imputer.fit(training_features) training_features = imputer.transform(training_features) testing_features = imputer.transform(testing_features) exported_pipeline = KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) """ assert_equal(export_code, expected_code)
def test_predict_2(): """Assert that the TPOT predict function returns a numpy matrix of shape (num_testing_rows,)""" tpot_obj = TPOTClassifier() pipeline_string= ('DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini' ', DecisionTreeClassifier__max_depth=8,DecisionTreeClassifier__min_samples_leaf=5,' 'DecisionTreeClassifier__min_samples_split=5)') tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) tpot_obj._fitted_pipeline = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline) tpot_obj._fitted_pipeline.fit(training_features, training_classes) result = tpot_obj.predict(testing_features) assert result.shape == (testing_features.shape[0],)
def test_export(): """Assert that TPOT's export function throws a RuntimeError when no optimized pipeline exists.""" tpot_obj = TPOTClassifier() assert_raises(RuntimeError, tpot_obj.export, "test_export.py") pipeline_string = ( 'KNeighborsClassifier(CombineDFs(' 'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, ' 'DecisionTreeClassifier__max_depth=8,DecisionTreeClassifier__min_samples_leaf=5,' 'DecisionTreeClassifier__min_samples_split=5), ZeroCount(input_matrix))' 'KNeighborsClassifier__n_neighbors=10, ' 'KNeighborsClassifier__p=1,KNeighborsClassifier__weights=uniform') pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) tpot_obj._optimized_pipeline = pipeline tpot_obj.export("test_export.py") assert path.isfile("test_export.py") remove("test_export.py") # clean up exported file
def test_export(): """Assert that TPOT's export function throws a RuntimeError when no optimized pipeline exists.""" tpot_obj = TPOTClassifier() assert_raises(RuntimeError, tpot_obj.export, "test_export.py") pipeline_string = ( 'KNeighborsClassifier(CombineDFs(' 'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, ' 'DecisionTreeClassifier__max_depth=8,DecisionTreeClassifier__min_samples_leaf=5,' 'DecisionTreeClassifier__min_samples_split=5), ZeroCount(input_matrix))' 'KNeighborsClassifier__n_neighbors=10, ' 'KNeighborsClassifier__p=1,KNeighborsClassifier__weights=uniform' ) pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) tpot_obj._optimized_pipeline = pipeline tpot_obj.export("test_export.py") assert path.isfile("test_export.py") remove("test_export.py") # clean up exported file
def test_score_2(): """Assert that the TPOTClassifier score function outputs a known score for a fix pipeline""" tpot_obj = TPOTClassifier() known_score = 0.977777777778 # Assumes use of the TPOT balanced_accuracy function # Reify pipeline with known score pipeline_string= ('KNeighborsClassifier(input_matrix, KNeighborsClassifier__n_neighbors=10, ' 'KNeighborsClassifier__p=1,KNeighborsClassifier__weights=uniform)') tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) tpot_obj._fitted_pipeline = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline) tpot_obj._fitted_pipeline.fit(training_features, training_classes) # Get score from TPOT score = tpot_obj.score(testing_features, testing_classes) # http://stackoverflow.com/questions/5595425/ def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) assert isclose(known_score, score)
def test_score_2(): """Assert that the TPOTClassifier score function outputs a known score for a fixed pipeline""" tpot_obj = TPOTClassifier() tpot_obj._pbar = tqdm(total=1, disable=True) known_score = 0.986318199045 # Assumes use of the TPOT balanced_accuracy function # Reify pipeline with known score tpot_obj._optimized_pipeline = creator.Individual.\ from_string('RandomForestClassifier(input_matrix)', tpot_obj._pset) tpot_obj._fitted_pipeline = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline) tpot_obj._fitted_pipeline.fit(training_features, training_classes) # Get score from TPOT score = tpot_obj.score(testing_features, testing_classes) # http://stackoverflow.com/questions/5595425/ def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) assert isclose(known_score, score)
def test_score_2(): """Assert that the TPOTClassifier score function outputs a known score for a fixed pipeline""" tpot_obj = TPOTClassifier() tpot_obj._pbar = tqdm(total=1, disable=True) known_score = 0.986318199045 # Assumes use of the TPOT balanced_accuracy function # Reify pipeline with known score tpot_obj._optimized_pipeline = creator.Individual.\ from_string('RandomForestClassifier(input_matrix)', tpot_obj._pset) tpot_obj._fitted_pipeline = tpot_obj._toolbox.compile( expr=tpot_obj._optimized_pipeline) tpot_obj._fitted_pipeline.fit(training_features, training_classes) # Get score from TPOT score = tpot_obj.score(testing_features, testing_classes) # http://stackoverflow.com/questions/5595425/ def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) assert isclose(known_score, score)
def test_predict_proba2(): """Assert that the TPOT predict_proba function returns a numpy matrix filled with probabilities (float)""" tpot_obj = TPOTClassifier() pipeline_string= ('DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini' ', DecisionTreeClassifier__max_depth=8,DecisionTreeClassifier__min_samples_leaf=5,' 'DecisionTreeClassifier__min_samples_split=5)') tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) tpot_obj._fitted_pipeline = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline) tpot_obj._fitted_pipeline.fit(training_features, training_classes) result = tpot_obj.predict_proba(testing_features) rows = result.shape[0] columns = result.shape[1] try: for i in range(rows): for j in range(columns): float_range(result[i][j]) assert True except Exception: assert False