Exemplo n.º 1
0
	def test_make_tpot_pmml_config(self):
		config = {
			"sklearn.kernel_approximation.RBFSampler" : {"gamma" : numpy.arange(0.0, 1.01, 0.05)},
			"sklearn.preprocessing.StandardScaler" : {}
		}
		tpot_pmml_config = make_tpot_pmml_config(config)
		self.assertEqual({"sklearn.preprocessing.StandardScaler" : {}}, tpot_pmml_config)
Exemplo n.º 2
0
def build_classifier(data, feature_pipeline, generations, population_size,
                     name):
    X, y = data
    Xt = feature_pipeline.fit_transform(X)
    Xt = Xt.astype(float)
    categories = pandas.unique(y)
    config = make_tpot_pmml_config(classifier_config_dict)
    config = filter_config(config)
    del config[
        "sklearn.naive_bayes.GaussianNB"]  # Does not support nesting - see http://mantis.dmg.org/view.php?id=208
    del config["sklearn.neighbors.KNeighborsClassifier"]
    del config[
        "sklearn.svm.LinearSVC"]  # Does not support classifier.predict_proba(Xt)
    del config["sklearn.tree.DecisionTreeClassifier"]
    classifier = TPOTClassifier(generations=generations,
                                population_size=population_size,
                                random_state=13,
                                config_dict=config,
                                verbosity=2)
    classifier.fit(Xt, y)
    pipeline = make_pmml_pipeline(Pipeline(steps=feature_pipeline.steps +
                                           classifier.fitted_pipeline_.steps),
                                  active_fields=X.columns.values,
                                  target_fields=[y.name])
    print(repr(pipeline))
    store_pkl(pipeline, name)
    result = DataFrame(classifier.predict(Xt), columns=[y.name])
    if (len(categories) > 0):
        probabilities = DataFrame(classifier.predict_proba(Xt),
                                  columns=[
                                      "probability(" + str(category) + ")"
                                      for category in categories
                                  ])
        result = pandas.concat([result, probabilities], axis=1)
    store_csv(result, name)
Exemplo n.º 3
0
def build_classifier(data, name):
    X, y = data
    categories = pandas.unique(y)
    config = make_tpot_pmml_config(classifier_config_dict)
    del config["sklearn.neighbors.KNeighborsClassifier"]
    classifier = TPOTClassifier(generations=1,
                                population_size=3,
                                random_state=13,
                                config_dict=config,
                                verbosity=2)
    classifier.fit(X, y)
    pipeline = make_pmml_pipeline(classifier.fitted_pipeline_,
                                  active_fields=X.columns.values,
                                  target_fields=[y.name])
    print(repr(pipeline))
    store_pkl(pipeline, name + ".pkl")
    result = DataFrame(classifier.predict(X), columns=[y.name])
    if (len(categories) > 0):
        probabilities = DataFrame(classifier.predict_proba(X),
                                  columns=[
                                      "probability(" + str(category) + ")"
                                      for category in categories
                                  ])
        result = pandas.concat([result, probabilities], axis=1)
    store_csv(result, name + ".csv")
Exemplo n.º 4
0
def build_regressor(data, name):
	X, y = data
	config = make_tpot_pmml_config(regressor_config_dict)
	del config["sklearn.neighbors.KNeighborsRegressor"]
	regressor = TPOTRegressor(generations = 3, population_size = 3, random_state = 13, config_dict = config, verbosity = 2)
	regressor.fit(X, y)
	pipeline = make_pmml_pipeline(regressor.fitted_pipeline_, active_fields = X.columns.values, target_fields = [y.name])
	print(repr(pipeline))
	store_pkl(pipeline, name)
	result = DataFrame(regressor.predict(X), columns = [y.name])
	store_csv(result, name)
Exemplo n.º 5
0
def build_regressor(data, feature_pipeline, generations, population_size,
                    name):
    X, y = data
    Xt = feature_pipeline.fit_transform(X)
    Xt = Xt.astype(float)
    config = make_tpot_pmml_config(regressor_config_dict)
    config = filter_config(config)
    del config["sklearn.neighbors.KNeighborsRegressor"]
    regressor = TPOTRegressor(generations=generations,
                              population_size=population_size,
                              random_state=13,
                              config_dict=config,
                              verbosity=2)
    regressor.fit(Xt, y)
    pipeline = Pipeline(steps=feature_pipeline.steps +
                        regressor.fitted_pipeline_.steps)
    pipeline = make_pmml_pipeline(pipeline,
                                  active_fields=X.columns.values,
                                  target_fields=[y.name])
    print(repr(pipeline))
    store_pkl(pipeline, name)
    result = DataFrame(regressor.predict(Xt), columns=[y.name])
    store_csv(result, name)