示例#1
0
文件: tests.py 项目: jaikumarm/tpot
def test_multinomial_nb():
    """Ensure that the TPOT MultinomialNB outputs the same as the sklearn MultinomialNB"""

    tpot_obj = TPOT()
    result = tpot_obj._multinomial_nb(training_testing_data, 1.0)
    result = result[result['group'] == 'testing']

    mnb = MultinomialNB(alpha=1.0, fit_prior=True)
    mnb.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, mnb.predict(testing_features))
示例#2
0
文件: tests.py 项目: jaikumarm/tpot
def test_variance_threshold():
    """Ensure that the tpot variance_threshold function behaves the same as the sklearn classifier"""
    tpot_obj = TPOT()
    non_feature_columns = ['class', 'group', 'guess']
    training_features = training_testing_data.loc[training_testing_data['group'] == 'training'].drop(non_feature_columns, axis=1)
    selector = VarianceThreshold(threshold=0)
    selector.fit(training_features)
    mask = selector.get_support(True)
    mask_cols = list(training_features.iloc[:, mask].columns) + non_feature_columns

    assert np.array_equal(tpot_obj._variance_threshold(training_testing_data, 0), training_testing_data[mask_cols])
示例#3
0
文件: tests.py 项目: jaikumarm/tpot
def test_logistic_regression():
    """Ensure that the TPOT logistic regression classifier outputs the same as the sklearn LogisticRegression"""

    tpot_obj = TPOT()
    result = tpot_obj._logistic_regression(training_testing_data, 5., 0, True)
    result = result[result['group'] == 'testing']

    lrc = LogisticRegression(C=5., penalty='l1', dual=False, random_state=42)
    lrc.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, lrc.predict(testing_features))
示例#4
0
文件: tests.py 项目: jaikumarm/tpot
def test_knnc_2():
    """Ensure that the TPOT k-nearest neighbor classifier outputs the same as the sklearn classifier when n_neighbor=0"""

    tpot_obj = TPOT()
    result = tpot_obj._knnc(training_testing_data, 0, 0)
    result = result[result['group'] == 'testing']

    knnc = KNeighborsClassifier(n_neighbors=2, weights='uniform')
    knnc.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, knnc.predict(testing_features))
示例#5
0
文件: tests.py 项目: jaikumarm/tpot
def test_extra_trees_3():
    """Ensure that the TPOT ExtraTreesClassifier outputs the same as the sklearn version when min_weight > 0.5"""
    tpot_obj = TPOT()

    result = tpot_obj._extra_trees(training_testing_data, 0, 1., 0.6)
    result = result[result['group'] == 'testing']

    etc = ExtraTreesClassifier(n_estimators=500, random_state=42, max_features=1., min_weight_fraction_leaf=0.5, criterion='gini')
    etc.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, etc.predict(testing_features))
示例#6
0
文件: tests.py 项目: jaikumarm/tpot
def test_random_forest_2():
    """Ensure that the TPOT random forest method outputs the same as the sklearn random forest when min_weight>0.5"""

    tpot_obj = TPOT()
    result = tpot_obj._random_forest(training_testing_data, 0.6)
    result = result[result['group'] == 'testing']

    rfc = RandomForestClassifier(n_estimators=500, min_weight_fraction_leaf=0.5, random_state=42, n_jobs=-1)
    rfc.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, rfc.predict(testing_features))
示例#7
0
def test_combine_dfs_2():
    """Check combine_dfs operator when the dataframes are equal"""
    tpot_obj = TPOT()

    df1 = pd.DataFrame({'a': range(10), 'b': range(10, 20)})

    df2 = pd.DataFrame({'a': range(10), 'b': range(10, 20)})

    combined_df = pd.DataFrame({'a': range(10), 'b': range(10, 20)})

    assert tpot_obj._combine_dfs(df1, df2).equals(combined_df)
示例#8
0
文件: tests.py 项目: jaikumarm/tpot
def test_bernoulli_nb():
    """Ensure that the TPOT BernoulliNB outputs the same as the sklearn BernoulliNB"""

    tpot_obj = TPOT()
    result = tpot_obj._bernoulli_nb(training_testing_data, 1.0, 0.0)
    result = result[result['group'] == 'testing']

    bnb = BernoulliNB(alpha=1.0, binarize=0.0, fit_prior=True)
    bnb.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, bnb.predict(testing_features))
示例#9
0
def test_gradient_boosting_2():
    """Ensure that the TPOT GradientBoostingClassifier outputs the same as the sklearn classifier when max_depth < 1"""

    tpot_obj = TPOT()
    result = tpot_obj._gradient_boosting(training_testing_data, 1.0, 0)
    result = result[result['group'] == 'testing']

    gbc = GradientBoostingClassifier(learning_rate=1.0, max_depth=1, n_estimators=500, random_state=42)
    gbc.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, gbc.predict(testing_features))
示例#10
0
def test_passive_aggressive_2():
    """Ensure that the TPOT PassiveAggressiveClassifier outputs the same as the sklearn classifier when C == 0.0"""

    tpot_obj = TPOT()
    result = tpot_obj._passive_aggressive(training_testing_data, 0.0, 0)
    result = result[result['group'] == 'testing']

    pagg = PassiveAggressiveClassifier(C=0.0001, loss='hinge', fit_intercept=True, random_state=42)
    pagg.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, pagg.predict(testing_features))
示例#11
0
def test_linear_svc():
    """Ensure that the TPOT LinearSVC outputs the same as the sklearn LinearSVC"""

    tpot_obj = TPOT()
    result = tpot_obj._linear_svc(training_testing_data, 1.0, 0, True)
    result = result[result['group'] == 'testing']

    lsvc = LinearSVC(C=1.0, loss='hinge', fit_intercept=True, random_state=42)
    lsvc.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, lsvc.predict(testing_features))
示例#12
0
def test_svc_2():
    """Ensure that the TPOT random forest method outputs the same as the sklearn svc when C<0.0001"""

    tpot_obj = TPOT()
    result = tpot_obj._svc(training_testing_data, 0.00001)
    result = result[result['group'] == 'testing']

    svc = SVC(C=0.0001, random_state=42)
    svc.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, svc.predict(testing_features))
示例#13
0
def test_random_forest_3():
    """Ensure that the TPOT random forest method outputs the same as the sklearn random forest when max_features>no. of features"""

    tpot_obj = TPOT()
    result = tpot_obj._random_forest(training_testing_data, 100)
    result = result[result['group'] == 'testing']

    rfc = RandomForestClassifier(n_estimators=500, max_features=64, random_state=42, n_jobs=-1)
    rfc.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, rfc.predict(testing_features))
示例#14
0
def test_decision_tree_3():
    """Ensure that the TPOT decision tree method outputs the same as the sklearn decision tree when max_features>no. of features"""

    tpot_obj = TPOT()
    result = tpot_obj._decision_tree(training_testing_data, 100, 0)
    result = result[result['group'] == 'testing']

    dtc = DecisionTreeClassifier(max_features=64, max_depth=None, random_state=42)
    dtc.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, dtc.predict(testing_features))
示例#15
0
文件: tests.py 项目: jaikumarm/tpot
def test_linear_svc_2():
    """Ensure that the TPOT LinearSVC outputs the same as the sklearn LinearSVC when C == 0.0"""

    tpot_obj = TPOT()
    result = tpot_obj._linear_svc(training_testing_data, 0.0, 0, True)
    result = result[result['group'] == 'testing']

    lsvc = LinearSVC(C=0.0001, penalty='l1', dual=False, random_state=42)
    lsvc.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, lsvc.predict(testing_features))
示例#16
0
文件: tests.py 项目: jaikumarm/tpot
def test_ada_boost_2():
    """Ensure that the TPOT AdaBoostClassifier outputs the same as the sklearn classifer when learning_rate == 0.0"""

    tpot_obj = TPOT()
    result = tpot_obj._ada_boost(training_testing_data, 0.0)
    result = result[result['group'] == 'testing']

    adaboost = AdaBoostClassifier(n_estimators=500, random_state=42, learning_rate=0.0001)
    adaboost.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, adaboost.predict(testing_features))
示例#17
0
文件: tests.py 项目: jaikumarm/tpot
def test_decision_tree_3():
    """Ensure that the TPOT decision tree method outputs the same as the sklearn decision tree when min_weight>0.5"""

    tpot_obj = TPOT()
    result = tpot_obj._decision_tree(training_testing_data, 0.6)
    result = result[result['group'] == 'testing']

    dtc = DecisionTreeClassifier(min_weight_fraction_leaf=0.5, random_state=42)
    dtc.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, dtc.predict(testing_features))
示例#18
0
文件: tests.py 项目: jaikumarm/tpot
def test_gaussian_nb():
    """Ensure that the TPOT GaussianNB outputs the same as the sklearn GaussianNB"""

    tpot_obj = TPOT()
    result = tpot_obj._gaussian_nb(training_testing_data)
    result = result[result['group'] == 'testing']

    gnb = GaussianNB()
    gnb.fit(training_features, training_classes)

    assert np.array_equal(result['guess'].values, gnb.predict(testing_features))
示例#19
0
def test_get_params():
    """Assert that get_params returns the exact dictionary of parameters used by TPOT"""
    kwargs = {'population_size': 500, 'generations': 1000, 'verbosity': 1}

    tpot_obj = TPOT(**kwargs)

    # Get default parameters of TPOT and merge with our specified parameters
    initializer = inspect.getargspec(TPOT.__init__)
    default_kwargs = dict(zip(initializer.args[1:], initializer.defaults))
    default_kwargs.update(kwargs)

    assert tpot_obj.get_params() == default_kwargs
示例#20
0
文件: tests.py 项目: slitayem/tpot
def test_df_feature_selection():
    tpot_obj = TPOT()

    top_10_feature_pairs = [
        '00002', '00013', '00020', '00021', '00026', '00042', '00043', '00058',
        '00061', 'class', 'group', 'guess'
    ]

    assert np.array_equal(
        tpot_obj._dt_feature_selection(training_testing_data,
                                       10).columns.values,
        top_10_feature_pairs)
示例#21
0
def test_predict_2():
    """Ensure that the TPOT predict function returns a DataFrame of shape (num_testing_rows,)"""

    tpot_obj = TPOT()
    tpot_obj._training_classes = training_classes
    tpot_obj._training_features = training_features
    tpot_obj._optimized_pipeline = creator.Individual.\
        from_string('_logistic_regression(input_df, 1.0, 0, True)', tpot_obj._pset)

    result = tpot_obj.predict(testing_features)

    assert result.shape == (testing_features.shape[0], )
示例#22
0
def test_train_model_and_predict():
    """Ensure that the TPOT train_model_and_predict returns the input dataframe when it has only 3 columns i.e. class, group, guess"""

    tpot_obj = TPOT()

    assert np.array_equal(
        training_testing_data.ix[:, -3:],
        tpot_obj._train_model_and_predict(training_testing_data.ix[:, -3:],
                                          LinearSVC,
                                          C=5.,
                                          penalty='l1',
                                          dual=False))
示例#23
0
def test_static_models():
    '''
        Ensure that the TPOT static classifiers outputs the same as the sklearn output
    '''

    tpot_obj = TPOT()
    models = [(tpot_obj.decision_tree, DecisionTreeClassifier, {
        'max_features': 0,
        'max_depth': 0
    }, {
        'max_features': 'auto',
        'max_depth': None
    }), (tpot_obj.svc, SVC, {
        'C': 0.0001
    }, {
        'C': 0.0001
    }),
              (tpot_obj.random_forest, RandomForestClassifier, {
                  'n_estimators': 100,
                  'max_features': 0
              }, {
                  'n_estimators': 100,
                  'max_features': 'auto',
                  'n_jobs': -1
              }),
              (tpot_obj.logistic_regression, LogisticRegression, {
                  'C': 0.0001
              }, {
                  'C': 0.0001
              }),
              (tpot_obj.knnc, KNeighborsClassifier, {
                  'n_neighbors': 100
              }, {
                  'n_neighbors': 100
              })]
    for model, sklearn_model, model_params, sklearn_params in models:

        result = model(training_testing_data, **model_params)
        try:
            sklearn_model_obj = sklearn_model(random_state=42,
                                              **sklearn_params)
            sklearn_model_obj.fit(training_features, training_classes)
        except TypeError:
            sklearn_model_obj = sklearn_model(**sklearn_params)
            sklearn_model_obj.fit(training_features, training_classes)

        result = result[result['group'] == 'testing']

        assert np.array_equal(result['guess'].values,
                              sklearn_model_obj.predict(
                                  testing_features)), "Model {} failed".format(
                                      str(model))
示例#24
0
文件: tests.py 项目: jaikumarm/tpot
def test_unroll_nested():
    """Ensure that export utils' unroll_nested_fuction_calls outputs pipeline_list as expected"""

    tpot_obj = TPOT()

    expected_list = [['result1', '_logistic_regression', 'input_df', '1.0', '0', 'True']]

    pipeline = creator.Individual.\
        from_string('_logistic_regression(input_df, 1.0, 0, True)', tpot_obj._pset)

    pipeline_list = unroll_nested_fuction_calls(pipeline)

    assert expected_list == pipeline_list
示例#25
0
def test_predict_2():
    """Assert that the TPOT predict function returns a numpy matrix of shape (num_testing_rows,)"""

    tpot_obj = TPOT()
    tpot_obj._optimized_pipeline = creator.Individual.\
        from_string('DecisionTreeClassifier(input_matrix)', tpot_obj._pset)
    tpot_obj._fitted_pipeline = tpot_obj._toolbox.compile(
        expr=tpot_obj._optimized_pipeline)
    tpot_obj._fitted_pipeline.fit(training_features, training_classes)

    result = tpot_obj.predict(testing_features)

    assert result.shape == (testing_features.shape[0], )
示例#26
0
def test_replace_function_calls_2():
    """Ensure export utils' replace_function_calls generates no exceptions"""

    tpot_obj = TPOT()

    for prim in tpot_obj._pset.primitives[pd.DataFrame]:
        simple_pipeline = ['result1']
        simple_pipeline.append(prim.name)

        for arg in prim.args:
            simple_pipeline.append(tpot_obj._pset.terminals[arg][0].value)

        replace_function_calls([simple_pipeline])
示例#27
0
文件: tests.py 项目: jaikumarm/tpot
def test_rfe_2():
    """Ensure that the TPOT RFE outputs the same result as the sklearn rfe when num_features>no. of features in the dataframe """
    tpot_obj = TPOT()

    non_feature_columns = ['class', 'group', 'guess']
    training_features = training_testing_data.loc[training_testing_data['group'] == 'training'].drop(non_feature_columns, axis=1)
    estimator = LinearSVC()
    rfe = RFE(estimator, 100, step=0.1)
    rfe.fit(training_features, training_classes)
    mask = rfe.get_support(True)
    mask_cols = list(training_features.iloc[:, mask].columns) + non_feature_columns

    assert np.array_equal(training_testing_data[mask_cols], tpot_obj._rfe(training_testing_data, 64, 0.1))
示例#28
0
文件: tests.py 项目: jaikumarm/tpot
def test_unroll_nested_2():
    """Ensure that export utils' unroll_nested_fuction_calls outputs pipelines with nested function calls as expectd"""

    tpot_obj = TPOT()

    expected_list = [['result1', '_select_percentile', 'input_df', '40'], ['result2', '_extra_trees', 'result1', '32', '0.62', '0.45']]

    pipeline = creator.Individual.\
        from_string('_extra_trees(_select_percentile(input_df, 40), 32, 0.62, 0.45000000000000001)', tpot_obj._pset)

    pipeline_list = unroll_nested_fuction_calls(pipeline)

    assert expected_list == pipeline_list
示例#29
0
文件: tests.py 项目: ml-lab/tpot
def test_init():
    """Ensure that the TPOT instantiator stores the TPOT variables properly"""

    tpot_obj = TPOT(population_size=500,
                    generations=1000,
                    mutation_rate=0.05,
                    crossover_rate=0.9,
                    verbosity=1)

    assert tpot_obj.population_size == 500
    assert tpot_obj.generations == 1000
    assert tpot_obj.mutation_rate == 0.05
    assert tpot_obj.crossover_rate == 0.9
    assert tpot_obj.verbosity == 1
示例#30
0
def test_combine_dfs():
    tpot_obj = TPOT()

    df1 = pd.DataFrame({'a': range(10), 'b': range(10, 20)})

    df2 = pd.DataFrame({'b': range(10, 20), 'c': range(20, 30)})

    combined_df = pd.DataFrame({
        'a': range(10),
        'b': range(10, 20),
        'c': range(20, 30)
    })

    assert tpot_obj.combine_dfs(df1, df2).equals(combined_df)