def test_mut_operator_stats_update(): """Asserts that self._random_mutation_operator updates stats as expected.""" tpot_obj = TPOTClassifier() tpot_obj._fit_init() ind = creator.Individual.from_string( 'KNeighborsClassifier(' 'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),' 'KNeighborsClassifier__n_neighbors=10, ' 'KNeighborsClassifier__p=1, ' 'KNeighborsClassifier__weights=uniform' ')', tpot_obj._pset ) initialize_stats_dict(ind) ind.statistics["crossover_count"] = random.randint(0, 10) ind.statistics["mutation_count"] = random.randint(0, 10) # set as evaluated pipelines in tpot_obj.evaluated_individuals_ tpot_obj.evaluated_individuals_[str(ind)] = tpot_obj._combine_individual_stats(2, 0.99, ind.statistics) for _ in range(10): offspring, = tpot_obj._random_mutation_operator(ind) assert offspring.statistics['crossover_count'] == ind.statistics['crossover_count'] assert offspring.statistics['mutation_count'] == ind.statistics['mutation_count'] + 1 assert offspring.statistics['predecessor'] == (str(ind),) ind = offspring
def test_pipeline_score_save(): """Assert that the TPOTClassifier can generate a scored pipeline export correctly.""" tpot_obj = TPOTClassifier() tpot_obj._fit_init() tpot_obj._pbar = tqdm(total=1, disable=True) pipeline_string = ( 'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),' 'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,' 'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)' ) pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) expected_code = """import numpy as np import pandas as pd from sklearn.feature_selection import SelectPercentile, f_classif from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.tree import DecisionTreeClassifier # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['target'].values, random_state=None) # Average CV score on the training set was:0.929813743 exported_pipeline = make_pipeline( SelectPercentile(score_func=f_classif, percentile=20), DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5) ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) """ assert_equal(expected_code, export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, pipeline_score=0.929813743))
def test_dict_initialization(): """Asserts that gp_deap.initialize_stats_dict initializes individual statistics correctly""" tpot_obj = TPOTClassifier() tpot_obj._fit_init() tb = tpot_obj._toolbox test_ind = tb.individual() initialize_stats_dict(test_ind) assert test_ind.statistics['generation'] == 0 assert test_ind.statistics['crossover_count'] == 0 assert test_ind.statistics['mutation_count'] == 0 assert test_ind.statistics['predecessor'] == ('ROOT',)
def test_mate_operator_stats_update(): """Assert that self._mate_operator updates stats as expected.""" tpot_obj = TPOTClassifier() tpot_obj._fit_init() ind1 = creator.Individual.from_string( 'KNeighborsClassifier(' 'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),' 'KNeighborsClassifier__n_neighbors=10, ' 'KNeighborsClassifier__p=1, ' 'KNeighborsClassifier__weights=uniform' ')', tpot_obj._pset ) ind2 = creator.Individual.from_string( 'KNeighborsClassifier(' 'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=True),' 'KNeighborsClassifier__n_neighbors=10, ' 'KNeighborsClassifier__p=2, ' 'KNeighborsClassifier__weights=uniform' ')', tpot_obj._pset ) initialize_stats_dict(ind1) initialize_stats_dict(ind2) # Randomly mutate the statistics ind1.statistics["crossover_count"] = random.randint(0, 10) ind1.statistics["mutation_count"] = random.randint(0, 10) ind2.statistics["crossover_count"] = random.randint(0, 10) ind2.statistics["mutation_count"] = random.randint(0, 10) # set as evaluated pipelines in tpot_obj.evaluated_individuals_ tpot_obj.evaluated_individuals_[str(ind1)] = tpot_obj._combine_individual_stats(2, 0.99, ind1.statistics) tpot_obj.evaluated_individuals_[str(ind2)] = tpot_obj._combine_individual_stats(2, 0.99, ind2.statistics) # Doing 10 tests for _ in range(10): offspring1, offspring2 = tpot_obj._mate_operator(ind1, ind2) assert offspring1.statistics['crossover_count'] == ind1.statistics['crossover_count'] + ind2.statistics['crossover_count'] + 1 assert offspring1.statistics['mutation_count'] == ind1.statistics['mutation_count'] + ind2.statistics['mutation_count'] assert offspring1.statistics['predecessor'] == (str(ind1), str(ind2)) # Offspring replaces on of the two predecessors # Don't need to worry about cloning if random.random() < 0.5: ind1 = offspring1 else: ind2 = offspring1
def test_pipeline_score_save(): """Assert that the TPOTClassifier can generate a scored pipeline export correctly.""" tpot_obj = TPOTClassifier() tpot_obj._fit_init() tpot_obj._pbar = tqdm(total=1, disable=True) pipeline_string = ( 'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),' 'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,' 'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)' ) pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) expected_code = """import numpy as np import pandas as pd from sklearn.feature_selection import SelectPercentile, f_classif from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.tree import DecisionTreeClassifier # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['target'].values, random_state=None) # Average CV score on the training set was:0.929813743 exported_pipeline = make_pipeline( SelectPercentile(score_func=f_classif, percentile=20), DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5) ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) """ assert_equal( expected_code, export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, pipeline_score=0.929813743))
def test_export_random_ind(): """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39.""" tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light") tpot_obj._fit_init() tpot_obj._pbar = tqdm(total=1, disable=True) pipeline = tpot_obj._toolbox.individual() expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.naive_bayes import BernoulliNB # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['target'].values, random_state=39) exported_pipeline = BernoulliNB(alpha=1.0, fit_prior=False) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) """ assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, random_state=tpot_obj.random_state)
def test_set_param_recursive_3(): """Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in StackingEstimator in a complex pipeline.""" pipeline_string = ( 'DecisionTreeClassifier(CombineDFs(' 'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, ' 'DecisionTreeClassifier__max_depth=8, DecisionTreeClassifier__min_samples_leaf=5,' 'DecisionTreeClassifier__min_samples_split=5),input_matrix) ' 'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8, ' 'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)' ) tpot_obj = TPOTClassifier() tpot_obj._fit_init() deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline) set_param_recursive(sklearn_pipeline.steps, 'random_state', 42) # StackingEstimator under the transformer_list of FeatureUnion assert getattr( getattr(sklearn_pipeline.steps[0][1].transformer_list[0][1], 'estimator'), 'random_state') == 42 assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
def test_export_random_ind(): """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39.""" tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light") tpot_obj._fit_init() tpot_obj._pbar = tqdm(total=1, disable=True) pipeline = tpot_obj._toolbox.individual() expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.naive_bayes import BernoulliNB # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['target'].values, random_state=39) exported_pipeline = BernoulliNB(alpha=1.0, fit_prior=False) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) """ assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, random_state=tpot_obj.random_state)
TPOTSelectPercentile, TPOTSelectPercentile_args = TPOTOperatorClassFactory( test_operator_key_1, classifier_config_dict[test_operator_key_1] ) TPOTSelectFromModel, TPOTSelectFromModel_args = TPOTOperatorClassFactory( test_operator_key_2, classifier_config_dict[test_operator_key_2] ) mnist_data = load_digits() training_features, testing_features, training_target, testing_target = \ train_test_split(mnist_data.data.astype(np.float64), mnist_data.target.astype(np.float64), random_state=42) tpot_obj = TPOTClassifier() tpot_obj._fit_init() tpot_obj_reg = TPOTRegressor() tpot_obj_reg._fit_init() def test_export_random_ind(): """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39.""" tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light") tpot_obj._fit_init() tpot_obj._pbar = tqdm(total=1, disable=True) pipeline = tpot_obj._toolbox.individual() expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.naive_bayes import BernoulliNB
TPOTSelectPercentile, TPOTSelectPercentile_args = TPOTOperatorClassFactory( test_operator_key_1, classifier_config_dict[test_operator_key_1] ) TPOTSelectFromModel, TPOTSelectFromModel_args = TPOTOperatorClassFactory( test_operator_key_2, classifier_config_dict[test_operator_key_2] ) digits_data = load_digits() training_features, testing_features, training_target, testing_target = \ train_test_split(digits_data.data.astype(np.float64), digits_data.target.astype(np.float64), random_state=42) tpot_obj = TPOTClassifier() tpot_obj._fit_init() tpot_obj_reg = TPOTRegressor() tpot_obj_reg._fit_init() def test_export_random_ind(): """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39.""" tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light") tpot_obj._fit_init() tpot_obj._pbar = tqdm(total=1, disable=True) pipeline = tpot_obj._toolbox.individual() expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.naive_bayes import BernoulliNB