Пример #1
0
def test_symbolic_classifier_comparison():
    """Test the classifier comparison example works"""

    X, y = make_classification(n_features=2,
                               n_redundant=0,
                               n_informative=2,
                               random_state=1,
                               n_clusters_per_class=1)
    rng = np.random.RandomState(2)
    X += 2 * rng.uniform(size=X.shape)
    linearly_separable = (X, y)
    datasets = [
        make_moons(noise=0.3, random_state=0),
        make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable
    ]
    scores = []
    for ds in datasets:
        X, y = ds
        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=.4, random_state=42)
        clf = SymbolicClassifier(random_state=0)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        scores.append(('%.2f' % score).lstrip('0'))

    assert_equal(scores, ['.95', '.93', '.95'])
Пример #2
0
def test_symbolic_classifier():
    """Check that SymbolicClassifier example works"""

    rng = check_random_state(0)
    cancer = load_breast_cancer()
    perm = rng.permutation(cancer.target.size)
    cancer.data = cancer.data[perm]
    cancer.target = cancer.target[perm]

    est = SymbolicClassifier(parsimony_coefficient=.01,
                             feature_names=cancer.feature_names,
                             random_state=1)
    est.fit(cancer.data[:400], cancer.target[:400])

    y_true = cancer.target[400:]
    y_score = est.predict_proba(cancer.data[400:])[:, 1]
    assert_almost_equal(roc_auc_score(y_true, y_score), 0.96937869822485212)

    dot_data = est._program.export_graphviz()
    expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", '
                'fillcolor="#136ed4"] ;\n1 [label="div", fillcolor="#136ed4"] '
                ';\n2 [label="worst fractal dimension", fillcolor="#60a6f6"] '
                ';\n3 [label="mean concave points", fillcolor="#60a6f6"] '
                ';\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", fillcolor="#136ed4"] '
                ';\n5 [label="mean concave points", fillcolor="#60a6f6"] ;\n6 '
                '[label="area error", fillcolor="#60a6f6"] ;\n4 -> 6 ;\n4 -> '
                '5 ;\n0 -> 4 ;\n0 -> 1 ;\n}')
    assert_equal(dot_data, expected)
def test_pickle():
    """Check pickability"""

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    score = est.score(boston.data[500:, :], boston.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(boston.data[500:, :], boston.target[500:])
    assert_equal(score, score2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    X_new = est.transform(boston.data[500:, :])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    X_new2 = est2.transform(boston.data[500:, :])
    assert_array_almost_equal(X_new, X_new2)

    # Check the classifier
    est = SymbolicClassifier(generations=2, random_state=0)
    est.fit(cancer.data[:100, :], cancer.target[:100])
    score = est.score(cancer.data[500:, :], cancer.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(cancer.data[500:, :], cancer.target[500:])
    assert_equal(score, score2)
def test_pipeline():
    """Check that SymbolicRegressor/Transformer can work in a pipeline"""

    # Check the regressor
    est = make_pipeline(StandardScaler(),
                        SymbolicRegressor(population_size=50,
                                          generations=5,
                                          tournament_size=5,
                                          random_state=0))
    est.fit(boston.data, boston.target)
    assert_almost_equal(est.score(boston.data, boston.target), -4.00270923)

    # Check the classifier
    est = make_pipeline(StandardScaler(),
                        SymbolicClassifier(population_size=50,
                                           generations=5,
                                           tournament_size=5,
                                           random_state=0))
    est.fit(cancer.data, cancer.target)
    assert_almost_equal(est.score(cancer.data, cancer.target), 0.934973637961)

    # Check the transformer
    est = make_pipeline(SymbolicTransformer(population_size=50,
                                            hall_of_fame=20,
                                            generations=5,
                                            tournament_size=5,
                                            random_state=0),
                        DecisionTreeRegressor())
    est.fit(boston.data, boston.target)
    assert_almost_equal(est.score(boston.data, boston.target), 1.0)
Пример #5
0
def test_validate_functions():
    """Check that valid functions are accepted & invalid ones raise error"""

    for Symbolic in (SymbolicRegressor, SymbolicTransformer):
        # These should be fine
        est = Symbolic(generations=2,
                       random_state=0,
                       function_set=(add2, sub2, mul2, div2))
        est.fit(boston.data, boston.target)
        est = Symbolic(generations=2,
                       random_state=0,
                       function_set=('add', 'sub', 'mul', div2))
        est.fit(boston.data, boston.target)

        # These should fail
        est = Symbolic(generations=2,
                       random_state=0,
                       function_set=('ni', 'sub', 'mul', div2))
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(generations=2,
                       random_state=0,
                       function_set=(7, 'sub', 'mul', div2))
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(generations=2, random_state=0, function_set=())
        assert_raises(ValueError, est.fit, boston.data, boston.target)

    # Now for the classifier... These should be fine
    est = SymbolicClassifier(generations=2,
                             random_state=0,
                             function_set=(add2, sub2, mul2, div2))
    est.fit(cancer.data, cancer.target)
    est = SymbolicClassifier(generations=2,
                             random_state=0,
                             function_set=('add', 'sub', 'mul', div2))
    est.fit(cancer.data, cancer.target)

    # These should fail
    est = SymbolicClassifier(generations=2,
                             random_state=0,
                             function_set=('ni', 'sub', 'mul', div2))
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    est = SymbolicClassifier(generations=2,
                             random_state=0,
                             function_set=(7, 'sub', 'mul', div2))
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    est = SymbolicClassifier(generations=2, random_state=0, function_set=())
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
Пример #6
0
def test_sample_weight():
    """Check sample_weight param works"""

    # Check constant sample_weight has no effect
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicRegressor(population_size=100, generations=2,
                             random_state=0)
    est1.fit(boston.data, boston.target)
    est2 = SymbolicRegressor(population_size=100, generations=2,
                             random_state=0)
    est2.fit(boston.data, boston.target, sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicRegressor(population_size=100, generations=2,
                             random_state=0)
    est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1)

    assert_almost_equal(est1._program.fitness_, est2._program.fitness_)
    assert_almost_equal(est1._program.fitness_, est3._program.fitness_)

    # And again for the classifier
    sample_weight = np.ones(cancer.target.shape[0])
    est1 = SymbolicClassifier(population_size=100, generations=2,
                              random_state=0)
    est1.fit(cancer.data, cancer.target)
    est2 = SymbolicClassifier(population_size=100, generations=2,
                              random_state=0)
    est2.fit(cancer.data, cancer.target, sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicClassifier(population_size=100, generations=2,
                              random_state=0)
    est3.fit(cancer.data, cancer.target, sample_weight=sample_weight * 1.1)

    assert_almost_equal(est1._program.fitness_, est2._program.fitness_)
    assert_almost_equal(est1._program.fitness_, est3._program.fitness_)

    # And again for the transformer
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicTransformer(population_size=100, generations=2,
                               random_state=0)
    est1 = est1.fit_transform(boston.data, boston.target)
    est2 = SymbolicTransformer(population_size=100, generations=2,
                               random_state=0)
    est2 = est2.fit_transform(boston.data, boston.target,
                              sample_weight=sample_weight)

    assert_array_almost_equal(est1, est2)
def Symbolic_reg_expr(X, y):

    est_gp = SymbolicClassifier(parsimony_coefficient=.01, random_state=1)
    est_gp = SymbolicRegressor(population_size=5000,
                               generations=20,
                               stopping_criteria=0.01,
                               p_crossover=0.7,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.1,
                               max_samples=0.9,
                               verbose=0,
                               parsimony_coefficient=0.01,
                               random_state=0)

    est_gp.fit(X, y)

    sym_expr = str(est_gp._program)

    X0, X1, X2, X3, X4, X5, X6, X7, X8, X9 = symbols(
        'X0 X1 X2 X3 X4 X5 X6 X7 X8 X9')

    converter = {
        'sub': lambda x, y: x - y,
        'div': lambda x, y: x / y,
        'mul': lambda x, y: x * y,
        'add': lambda x, y: x + y,
        'neg': lambda x: -x,
        'pow': lambda x, y: x**y
    }

    sym_reg = simplify(sympify(sym_expr, locals=converter))
    sym_reg = sym_reg.subs((X0, X1, X2, X3, X4, X5, X6, X7, X8, X9),
                           (X0, X1, X2, X3, X4, X5, X6, X7, X8, X9))

    vars_ = [X0, X1, X2, X3, X4, X5, X6, X7, X8, X9]
    gradients_ = []

    for var in vars_:

        gradients_.append(diff(sym_reg, var))

    return sym_reg, gradients_
Пример #8
0
def main():

    seed = 0
    np.random.seed(seed)
    df = Dataset('ml_project1_data.xlsx').rm_df
    y = df['Response']
    X = df.drop(columns='Response')
    training, testing, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42)





    training['Response'] = y_train
    testing['Response'] = y_test
    pr = Processor(training,testing,seed = 0)
    fe = FeatureEngineer(pr.training,pr.unseen,seed = 0)
    training = fe.training
    testing = fe.unseen
    est = SymbolicClassifier(generations = 200, random_state = 0)
    est.fit(training.drop('Response',axis = 1), training['Response'])
    assess_generalization_auroc(est,testing,True)
    y_pred = est.predict_proba(testing.drop('Response',axis = 1))[:,1]
    y_true = testing['Response']
    print(profit(y_true, y_pred))

    #+++++++++++++++++ 5) modelling
    #Create Optimizer
    '''
    mlp_param_grid = {'mlpc__hidden_layer_sizes': [(3), (6), (3, 3), (5, 5)],
                      'mlpc__learning_rate_init': [0.001, 0.01]}
    mlp_gscv = bayes_optimization_MLP(fe.training,mlp_param_grid, cv = 5,seed = 0)
    #mlp_gscv.fit(training.loc[:, (training.columns != "Response")].values, training["Response"].values)
    print("Best parameter set: ", mlp_gscv.best_params_)
    # pd.DataFrame.from_dict(mlp_gscv.cv_results_).to_excel("D:\\PipeLines\\project_directory\\data\\mlp_gscv.xlsx")

    #+++++++++++++++++ 6) retraining & assessment of generalization ability
    #auprc,precision, recall = assess_generalization_auroc(mlp_gscv.best_estimator_, testing)
    #print("AUPRC: {:.2f}".format(auprc))
    '''

    plt.show()
Пример #9
0
def test_parallel_train():
    """Check predictions are the same for different n_jobs"""

    # Check the regressor
    ests = [
        SymbolicRegressor(population_size=100,
                          generations=4,
                          n_jobs=n_jobs,
                          random_state=0).fit(boston.data[:100, :],
                                              boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.predict(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)

    # Check the transformer
    ests = [
        SymbolicTransformer(population_size=100,
                            hall_of_fame=50,
                            generations=4,
                            n_jobs=n_jobs,
                            random_state=0).fit(boston.data[:100, :],
                                                boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.transform(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)

    # Check the classifier
    ests = [
        SymbolicClassifier(population_size=100,
                           generations=4,
                           n_jobs=n_jobs,
                           random_state=0).fit(cancer.data[:100, :],
                                               cancer.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.predict(cancer.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)
Пример #10
0
    def gp(self) -> Pipeline:
        """
        Creates a pipeline for Genetic programming

        :return Pipeline: returns a Pipeline for the best estimator
        """
        pipeline = Pipeline(
            steps=[('scaler', StandardScaler()), ('gp', SymbolicClassifier())])

        params_grid = {'gp__generations': [10, 50, 100]}

        return self.do_grid_search("gp", pipeline, params_grid)
Пример #11
0
def gp_grid_search(training, param_grid, seed, cv=5):
    pipeline = Pipeline([("gp", SymbolicClassifier(random_state=seed))])

    clf_gscv = GridSearchCV(pipeline,
                            param_grid,
                            cv=cv,
                            n_jobs=-1,
                            scoring=make_scorer(profit))
    clf_gscv.fit(training.loc[:, training.columns != "Response"].values,
                 training["Response"].values)

    return clf_gscv
Пример #12
0
def main(train_file_name,valid_file_name,test_file_name):

    X_train, y_train, X_validation, y_validation, X_test = \
        load_process_data(train_file_name, valid_file_name, test_file_name)

    gp_classifier = SymbolicClassifier(population_size=20,
                                       generations=65,
                                       tournament_size=3,
                                       const_range=None,
                                       init_depth=(4, 12),
                                       parsimony_coefficient=0.00000000000000000000000000000001,
                                       # parsimony_coefficient=0.0,
                                       # init_method='full',
                                       function_set=('add', 'sub',
                                                     'mul', 'div'),
                                       # make_function(my_sqr, "sqr", arity=2, wrap=False)),
                                       transformer='sigmoid',
                                       #metric=f_beta,
                                       p_crossover=0.85,
                                       p_subtree_mutation=0.04,
                                       p_hoist_mutation=0.01,
                                       p_point_mutation=0.04,
                                       p_point_replace=0.005,
                                       max_samples=1.0,
                                       feature_names=None,
                                       warm_start=False,
                                       low_memory=True,
                                       n_jobs=8,
                                       verbose=1,
                                       random_state=None)


    gp_classifier.fit(X_train, y_train)

    y_val_proba = gp_classifier.predict_proba(X_validation)
    y_train_proba = gp_classifier.predict_proba(X_train)
    best_threshold = get_best_threshold(y_val_proba, y_validation)

    y_train_pred = np.where(y_train_proba[:, 1]
                            > best_threshold, 1, 0)
    y_val_pred = np.where(y_val_proba[:, 1] > best_threshold, 1, 0)
    str_header = "$"*78
    print(str_header)
    print(str_header)
    print('Train accuracy', accuracy_score(y_train, y_train_pred))
    print('Validation accuracy', accuracy_score(y_validation, y_val_pred))

    print('Train precision', precision_score(y_train, y_train_pred))
    print('Validation precision', precision_score(y_validation, y_val_pred))

    print('Train recall', recall_score(y_train, y_train_pred))
    print('Validation recall', recall_score(y_validation, y_val_pred))

    print('Train f-beta score', fbeta_score(y_train, y_train_pred, beta=0.25))
    validation_beta_score = fbeta_score(y_validation, y_val_pred, beta=0.25)
    print(f'Validation f-beta score {validation_beta_score}')
    print(str_header)
    print(str_header)
def test_early_stopping():
    """Check that early stopping works"""

    est1 = SymbolicRegressor(stopping_criteria=10, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)

    est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0)
    est1.fit(boston.data[:400, :], boston.target[:400])
    assert_true(len(est1._programs) == 1)

    est1 = SymbolicClassifier(stopping_criteria=.9, random_state=0)
    est1.fit(cancer.data[:400, :], cancer.target[:400])
    assert_true(len(est1._programs) == 1)
Пример #14
0
def test_custom_classifier_metrics():
    """Check whether greater_is_better works for SymbolicClassifier."""

    x_data = check_random_state(0).uniform(-1, 1, 100).reshape(50, 2)
    y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2
    y_true = (y_true < y_true.mean()).astype(int)

    est_gp = SymbolicClassifier(metric='log loss',
                                stopping_criteria=0.000001,
                                random_state=415,
                                parsimony_coefficient=0.01,
                                init_method='full',
                                init_depth=(2, 4))
    est_gp.fit(x_data, y_true)
    formula = est_gp.__str__()
    expected_formula = 'sub(0.364, mul(add(X0, X0), add(X0, X0)))'
    assert_equal(expected_formula, formula, True)

    def negative_log_loss(y, y_pred, w):
        """Calculate the log loss."""
        eps = 1e-15
        y_pred = np.clip(y_pred, eps, 1 - eps)
        score = y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred)
        return np.average(score, weights=w)

    customized_fitness = make_fitness(negative_log_loss,
                                      greater_is_better=True)

    c_est_gp = SymbolicClassifier(metric=customized_fitness,
                                  stopping_criteria=0.000001,
                                  random_state=415,
                                  parsimony_coefficient=0.01,
                                  init_method='full',
                                  init_depth=(2, 4))
    c_est_gp.fit(x_data, y_true)
    c_formula = c_est_gp.__str__()
    assert_equal(expected_formula, c_formula, True)
Пример #15
0
def train():
    est_gp = SymbolicClassifier(population_size=250, generations=20, tournament_size=20,
                                stopping_criteria=0.01, parsimony_coefficient=0.001,
                                p_crossover=0.9, p_subtree_mutation=0.05, p_hoist_mutation=0.0025,
                                p_point_mutation=0.01, p_point_replace=0.0025, verbose=1,
                                max_samples=0.9, feature_names=feature_names)

    est_gp.fit(X_train, y_train)
    print(est_gp._program)
    print(est_gp.score(X_train, y_train))
    print(est_gp.score(X_test, y_test))
def test_input_shape():
    """Check changed dimensions cause failure"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    yc = np.asarray(['foo', 'bar', 'foo', 'foo', 'bar'])
    X2 = np.reshape(random_state.uniform(size=45), (5, 9))

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.predict, X2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.transform, X2)

    # Check the classifier
    est = SymbolicClassifier(generations=2, random_state=0)
    est.fit(X, yc)
    assert_raises(ValueError, est.predict, X2)
import pandas as pd
from gplearn.genetic import SymbolicClassifier
from sklearn.metrics import roc_auc_score
from sklearn.utils import shuffle

if __name__ == '__main__':
    # creating data structures
    train_set = pd.read_csv("training.txt", sep=" ")
    test_set = pd.read_csv("test.txt", sep=" ")

    x_train = train_set.drop("Target", axis=1)
    y_train = train_set["Target"]
    x_test = test_set.drop("Target", axis=1)
    y_test = test_set["Target"]

    est = SymbolicClassifier(parsimony_coefficient=.01,
                             stopping_criteria=0.01,
                             feature_names=list(x_train.columns.values),
                             random_state=3)

    est.fit(x_train, y_train)

    y_true = y_test
    y_score = est.predict_proba(x_test)[:, 1]

    print("Accuracy:", roc_auc_score(y_true, y_score), "Program:",
          est._program)
Пример #18
0
    # As according to the documentation if the last value is a 2 then the person is known to have a benign tumor
    if int(data[10]) == 2:
        # Not cancerous
        benign.append("benign")
    else:
        # Is cancerous
        benign.append("malignant")

classifier = SymbolicClassifier(
    # Prevents 'bloat' used for large programs when evolution is increasing the size of the program with an
    # insignificant increase in fitness
    parsimony_coefficient=.01,
    # The list of attributes names, used in producing the final equation
    feature_names=attributes,
    # Displays each evolutionary state and fitness after each tournament is run
    # Note: If commented the user will need to be patient before final results are displayed
    verbose=1,
    # Stops the program early if the criteria is met. This is to prevent long computation time for minimal gain
    stopping_criteria=0.15,
    # When the population is 500 = ~85% 1000 = ~90% 2000 = ~95%
    population_size=2000,
    # basic functions are all that is required the inclusion of log functions provides roughly 5% increase in fitness
    function_set={"mul", "div", "add", "sub", "log"})

# The first 400 values in the file are trained and tested against the first 400 known values to be benign
classifier.fit(values[:400], benign[:400])
# Returns the accuracy as a percentage from the fitness function
print("Accuracy: " +
      (classifier.score(values[:400], benign[:400]) * 100).__str__() + "%")
# Returns the function that achieves the above fitness to be entered into a tree in a breadth first fashion
print("Function: " + str(classifier._program))
        y_pred = 1
    diffs = np.abs(y - y_pred)  # calculate how many different values

    return 1 - (np.sum(diffs) / len(y_pred))


accuracy = make_fitness(_accuracy, greater_is_better=True)

est_gp = SymbolicClassifier(
    population_size=1000,
    generations=200,
    stopping_criteria=0.01,
    p_crossover=0.7,
    p_subtree_mutation=0.1,
    p_hoist_mutation=0.05,
    p_point_mutation=0.1,
    max_samples=0.9,
    verbose=1,
    feature_names=('V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
                   'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18',
                   'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26',
                   'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34',
                   'V35', 'V36'),
    function_set=('add', 'sub', 'mul', 'div'))
est_gp.fit(X_train, y_train)
print('The best individual is : ')
print(est_gp)
print('Training set accuracy is %0.2f%%' %
      (100 * est_gp.score(X_train, y_train)))

Predict_value = est_gp.predict(X_test)
count = 0
Пример #20
0
        'param_grid': {
            'C': cv_params['clf_svm_c'],
            'kernel': cv_params['clf_svm_kern'],
            'degree': cv_params['clf_svm_deg'],
            'gamma': cv_params['clf_svm_g']}},
    'SGDClassifier': {
        'estimator': SGDClassifier(class_weight='balanced',
                                   penalty=args.clf_sgd_penalty,
                                   random_state=args.random_seed),
        'param_grid': {
            'alpha': cv_params['clf_sgd_a'],
            'loss': cv_params['clf_sgd_loss'],
            'l1_ratio': cv_params['clf_sgd_l1r']}},
    'SymbolicClassifier': {
        'estimator': SymbolicClassifier(parsimony_coefficient='auto',
                                        random_state=args.random_seed,
                                        stopping_criteria=0.01),
        'param_grid': {
            'function_set': cv_params['clf_sym_fs'],
            'generations': cv_params['clf_sym_g'],
            'p_crossover': cv_params['clf_sym_pcr'],
            'p_hoist_mutation': cv_params['clf_sym_phm'],
            'p_point_mutation': cv_params['clf_sym_ppm'],
            'p_point_replace': cv_params['clf_sym_ppr'],
            'p_subtree_mutation': cv_params['clf_sym_psm'],
            'population_size': cv_params['clf_sym_ps'],
            'tournament_size': cv_params['clf_sym_ts']}}}

params_num_xticks = [
    'slr__k',
    'clf__degree',
Пример #21
0
def test_sklearn_classifier_checks():
    """Run the sklearn estimator validation checks on SymbolicClassifier"""

    custom_check_estimator(SymbolicClassifier(population_size=50,
                                              generations=5))
Пример #22
0
def test_parallel_custom_transformer():
    """Regression test for running parallel training with custom transformer"""
    def _sigmoid(x1):
        with np.errstate(over='ignore', under='ignore'):
            return 1 / (1 + np.exp(-x1))

    sigmoid = make_function(function=_sigmoid, name='sig', arity=1)
    est = SymbolicClassifier(generations=2,
                             transformer=sigmoid,
                             random_state=0,
                             n_jobs=2)
    est.fit(cancer.data, cancer.target)
    _ = pickle.dumps(est)

    # Unwrapped functions should fail
    sigmoid = make_function(function=_sigmoid, name='sig', arity=1, wrap=False)
    est = SymbolicClassifier(generations=2,
                             transformer=sigmoid,
                             random_state=0,
                             n_jobs=2)
    est.fit(cancer.data, cancer.target)
    assert_raises(AttributeError, pickle.dumps, est)

    # Single threaded will also fail in non-interactive sessions
    est = SymbolicClassifier(generations=2,
                             transformer=sigmoid,
                             random_state=0)
    est.fit(cancer.data, cancer.target)
    assert_raises(AttributeError, pickle.dumps, est)
def test_program_input_validation_classifier():
    """Check that guarded input validation raises errors"""

    # Check too much proba
    est = SymbolicClassifier(p_point_mutation=.5)
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)

    # Check invalid init_method
    est = SymbolicClassifier(init_method='ni')
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)

    # Check invalid const_ranges
    est = SymbolicClassifier(const_range=2)
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    est = SymbolicClassifier(const_range=[2, 2])
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    est = SymbolicClassifier(const_range=(2, 2, 2))
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    est = SymbolicClassifier(const_range='ni')
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    # And check acceptable, but strange, representations of const_range
    est = SymbolicClassifier(generations=2, const_range=(2, 2))
    est.fit(cancer.data, cancer.target)
    est = SymbolicClassifier(generations=2, const_range=None)
    est.fit(cancer.data, cancer.target)
    est = SymbolicClassifier(generations=2, const_range=(4, 2))
    est.fit(cancer.data, cancer.target)

    # Check invalid init_depth
    est = SymbolicClassifier(init_depth=2)
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    est = SymbolicClassifier(init_depth=2)
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    est = SymbolicClassifier(init_depth=[2, 2])
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    est = SymbolicClassifier(init_depth=(2, 2, 2))
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    est = SymbolicClassifier(init_depth='ni')
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    est = SymbolicClassifier(init_depth=(4, 2))
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    # And check acceptable, but strange, representations of init_depth
    est = SymbolicClassifier(generations=2, init_depth=(2, 2))
    est.fit(cancer.data, cancer.target)

    # Check classifier metrics
    for m in ['log loss']:
        est = SymbolicClassifier(generations=2, metric=m)
        est.fit(cancer.data, cancer.target)
    # And check a fake one
    est = SymbolicClassifier(generations=2, metric='the larch')
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)

    # Check classifier transformers
    for t in ['sigmoid']:
        est = SymbolicClassifier(generations=2, transformer=t)
        est.fit(cancer.data, cancer.target)
    # And check an incompatible one with wrong arity
    est = SymbolicClassifier(generations=2, transformer=sub2)
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
    # And check a fake one
    est = SymbolicClassifier(generations=2, transformer='the larch')
    assert_raises(ValueError, est.fit, cancer.data, cancer.target)
def test_print_overloading_estimator():
    """Check that printing a fitted estimator results in 'pretty' output"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)

    # Unfitted
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_unfitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    # Fitted
    est.fit(X, y)
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_fitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est._program)
        output_program = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    assert_true(output_unfitted != output_fitted)
    assert_true(output_unfitted == est.__repr__())
    assert_true(output_fitted == output_program)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)

    # Unfitted
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_unfitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    # Fitted
    est.fit(X, y)
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_fitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        output = str([gp.__str__() for gp in est])
        print(output.replace("',", ",\n").replace("'", ""))
        output_program = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    assert_true(output_unfitted != output_fitted)
    assert_true(output_unfitted == est.__repr__())
    assert_true(output_fitted == output_program)

    # Check the classifier
    y = (y > .5).astype(int)
    est = SymbolicClassifier(generations=2, random_state=0)

    # Unfitted
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_unfitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    # Fitted
    est.fit(X, y)
    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est)
        output_fitted = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    orig_stdout = sys.stdout
    try:
        out = StringIO()
        sys.stdout = out
        print(est._program)
        output_program = out.getvalue().strip()
    finally:
        sys.stdout = orig_stdout

    assert_true(output_unfitted != output_fitted)
    assert_true(output_unfitted == est.__repr__())
    assert_true(output_fitted == output_program)
Пример #25
0
    if len(data) > 9:
        temp = []
        for i in range(1, 10):
            x = int(data[i]) if data[i] != "?" else -1
            temp.append(x)

        values.append(temp)

        if int(data[10]) == 2:
            alive.append("benign")
        else:
            alive.append("malignant")

est = SymbolicClassifier(parsimony_coefficient=.01,
                         feature_names=attributes,
                         random_state=10000,
                         verbose=1,
                         stopping_criteria=0.15,
                         population_size=2000,
                         function_set={"mul", "div", "add", "sub", "log"})

est.fit(values[:400], alive[:400])
print("Accuracy: " + est.score(values[:400], alive[:400]).__str__())
# noinspection PyProtectedMember
# print("Function: " + str(est._program))

# noinspection PyProtectedMember
# graph = pydotplus.graphviz.graph_from_dot_data(est._program.export_graphviz())
# Image(graph.create_png())
# graph.write_png("dtree.png")
Пример #26
0
def test_sklearn_customized_checks():
    """Run custom binary estimator validation checks on SymbolicClassifier"""

    rewritten_check_estimator(SymbolicClassifier(population_size=50,
                                                 generations=5))
Пример #27
0
# https://gplearn.readthedocs.io/en/stable/reference.html#symbolic-classifier
sc = SymbolicClassifier(
    population_size=2000,
    generations=20,
    tournament_size=25,
    const_range=(-1.5, 1.5),
    #     init_depth=(10, 20),
    #     init_method='full',
    init_method='half and half',
    function_set=(
        'add', 'sub', 'mul', 'div', 'cos', 'log'
        #         'sin', 'min', 'max', 'sqrt',  #'neg', 'tan'
    ),
    transformer='sigmoid',

    #     metric=mf_wf, stopping_criteria=2.0,
    parsimony_coefficient=0.0001,
    p_crossover=0.7,
    p_subtree_mutation=0.2,
    p_hoist_mutation=0.00,
    p_point_mutation=0.1,
    p_point_replace=0.05,
    max_samples=.9,
    #     feature_names=train_x.columns,
    low_memory=True,
    n_jobs=-1,
    verbose=1,
    random_state=None)
pipeline_gp = make_pipeline(sc)
param_grid_gp = {}