Exemplo n.º 1
0
def test_mut_operator_stats_update():
    """Asserts that self._random_mutation_operator updates stats as expected."""
    tpot_obj = TPOTClassifier()
    tpot_obj._fit_init()
    ind = creator.Individual.from_string(
        'KNeighborsClassifier('
        'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),'
        'KNeighborsClassifier__n_neighbors=10, '
        'KNeighborsClassifier__p=1, '
        'KNeighborsClassifier__weights=uniform'
        ')',
        tpot_obj._pset
    )

    initialize_stats_dict(ind)

    ind.statistics["crossover_count"] = random.randint(0, 10)
    ind.statistics["mutation_count"] = random.randint(0, 10)

    # set as evaluated pipelines in tpot_obj.evaluated_individuals_
    tpot_obj.evaluated_individuals_[str(ind)] = tpot_obj._combine_individual_stats(2, 0.99, ind.statistics)

    for _ in range(10):
        offspring, = tpot_obj._random_mutation_operator(ind)
        
        assert offspring.statistics['crossover_count'] == ind.statistics['crossover_count']
        assert offspring.statistics['mutation_count'] == ind.statistics['mutation_count'] + 1
        assert offspring.statistics['predecessor'] == (str(ind),)

        ind = offspring
Exemplo n.º 2
0
def test_pipeline_score_save():
    """Assert that the TPOTClassifier can generate a scored pipeline export correctly."""
    tpot_obj = TPOTClassifier()
    tpot_obj._fit_init()
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline_string = (
        'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
    )
    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'].values, random_state=None)

# Average CV score on the training set was:0.929813743
exported_pipeline = make_pipeline(
    SelectPercentile(score_func=f_classif, percentile=20),
    DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    assert_equal(expected_code, export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, pipeline_score=0.929813743))
Exemplo n.º 3
0
def test_dict_initialization():
    """Asserts that gp_deap.initialize_stats_dict initializes individual statistics correctly"""
    tpot_obj = TPOTClassifier()
    tpot_obj._fit_init()
    tb = tpot_obj._toolbox

    test_ind = tb.individual()
    initialize_stats_dict(test_ind)

    assert test_ind.statistics['generation'] == 0
    assert test_ind.statistics['crossover_count'] == 0
    assert test_ind.statistics['mutation_count'] == 0
    assert test_ind.statistics['predecessor'] == ('ROOT',)
Exemplo n.º 4
0
def test_mate_operator_stats_update():
    """Assert that self._mate_operator updates stats as expected."""
    tpot_obj = TPOTClassifier()
    tpot_obj._fit_init()
    ind1 = creator.Individual.from_string(
        'KNeighborsClassifier('
        'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),'
        'KNeighborsClassifier__n_neighbors=10, '
        'KNeighborsClassifier__p=1, '
        'KNeighborsClassifier__weights=uniform'
        ')',
        tpot_obj._pset
    )
    ind2 = creator.Individual.from_string(
        'KNeighborsClassifier('
        'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=True),'
        'KNeighborsClassifier__n_neighbors=10, '
        'KNeighborsClassifier__p=2, '
        'KNeighborsClassifier__weights=uniform'
        ')',
        tpot_obj._pset
    )

    initialize_stats_dict(ind1)
    initialize_stats_dict(ind2)

    # Randomly mutate the statistics
    ind1.statistics["crossover_count"] = random.randint(0, 10)
    ind1.statistics["mutation_count"] = random.randint(0, 10)
    ind2.statistics["crossover_count"] = random.randint(0, 10)
    ind2.statistics["mutation_count"] = random.randint(0, 10)

    # set as evaluated pipelines in tpot_obj.evaluated_individuals_
    tpot_obj.evaluated_individuals_[str(ind1)] = tpot_obj._combine_individual_stats(2, 0.99, ind1.statistics)
    tpot_obj.evaluated_individuals_[str(ind2)] = tpot_obj._combine_individual_stats(2, 0.99, ind2.statistics)

    # Doing 10 tests
    for _ in range(10):
        offspring1, offspring2 = tpot_obj._mate_operator(ind1, ind2)

        assert offspring1.statistics['crossover_count'] == ind1.statistics['crossover_count'] + ind2.statistics['crossover_count'] + 1
        assert offspring1.statistics['mutation_count'] == ind1.statistics['mutation_count'] + ind2.statistics['mutation_count']
        assert offspring1.statistics['predecessor'] == (str(ind1), str(ind2))

        # Offspring replaces on of the two predecessors
        # Don't need to worry about cloning
        if random.random() < 0.5:
            ind1 = offspring1
        else:
            ind2 = offspring1
Exemplo n.º 5
0
def test_pipeline_score_save():
    """Assert that the TPOTClassifier can generate a scored pipeline export correctly."""
    tpot_obj = TPOTClassifier()
    tpot_obj._fit_init()
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline_string = (
        'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
    )
    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'].values, random_state=None)

# Average CV score on the training set was:0.929813743
exported_pipeline = make_pipeline(
    SelectPercentile(score_func=f_classif, percentile=20),
    DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    assert_equal(
        expected_code,
        export_pipeline(pipeline,
                        tpot_obj.operators,
                        tpot_obj._pset,
                        pipeline_score=0.929813743))
Exemplo n.º 6
0
def test_export_random_ind():
    """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
    tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light")
    tpot_obj._fit_init()
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline = tpot_obj._toolbox.individual()
    expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'].values, random_state=39)

exported_pipeline = BernoulliNB(alpha=1.0, fit_prior=False)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, random_state=tpot_obj.random_state)
Exemplo n.º 7
0
def test_set_param_recursive_3():
    """Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in StackingEstimator in a complex pipeline."""
    pipeline_string = (
        'DecisionTreeClassifier(CombineDFs('
        'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
        'DecisionTreeClassifier__max_depth=8, DecisionTreeClassifier__min_samples_leaf=5,'
        'DecisionTreeClassifier__min_samples_split=5),input_matrix) '
        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8, '
        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
    )
    tpot_obj = TPOTClassifier()
    tpot_obj._fit_init()

    deap_pipeline = creator.Individual.from_string(pipeline_string,
                                                   tpot_obj._pset)
    sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
    set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)

    # StackingEstimator under the transformer_list of FeatureUnion
    assert getattr(
        getattr(sklearn_pipeline.steps[0][1].transformer_list[0][1],
                'estimator'), 'random_state') == 42
    assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
Exemplo n.º 8
0
def test_export_random_ind():
    """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
    tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light")
    tpot_obj._fit_init()
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline = tpot_obj._toolbox.individual()
    expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'].values, random_state=39)

exported_pipeline = BernoulliNB(alpha=1.0, fit_prior=False)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, random_state=tpot_obj.random_state)
Exemplo n.º 9
0
TPOTSelectPercentile, TPOTSelectPercentile_args = TPOTOperatorClassFactory(
    test_operator_key_1,
    classifier_config_dict[test_operator_key_1]
)

TPOTSelectFromModel, TPOTSelectFromModel_args = TPOTOperatorClassFactory(
    test_operator_key_2,
    classifier_config_dict[test_operator_key_2]
)

mnist_data = load_digits()
training_features, testing_features, training_target, testing_target = \
    train_test_split(mnist_data.data.astype(np.float64), mnist_data.target.astype(np.float64), random_state=42)

tpot_obj = TPOTClassifier()
tpot_obj._fit_init()

tpot_obj_reg = TPOTRegressor()
tpot_obj_reg._fit_init()

def test_export_random_ind():
    """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
    tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light")
    tpot_obj._fit_init()
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline = tpot_obj._toolbox.individual()
    expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
Exemplo n.º 10
0
TPOTSelectPercentile, TPOTSelectPercentile_args = TPOTOperatorClassFactory(
    test_operator_key_1,
    classifier_config_dict[test_operator_key_1]
)

TPOTSelectFromModel, TPOTSelectFromModel_args = TPOTOperatorClassFactory(
    test_operator_key_2,
    classifier_config_dict[test_operator_key_2]
)

digits_data = load_digits()
training_features, testing_features, training_target, testing_target = \
    train_test_split(digits_data.data.astype(np.float64), digits_data.target.astype(np.float64), random_state=42)

tpot_obj = TPOTClassifier()
tpot_obj._fit_init()

tpot_obj_reg = TPOTRegressor()
tpot_obj_reg._fit_init()

def test_export_random_ind():
    """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
    tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light")
    tpot_obj._fit_init()
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline = tpot_obj._toolbox.individual()
    expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB