def test_fit(): """Assert that the TPOT fit function provides an optimized pipeline""" tpot_obj = TPOT(random_state=42, population_size=1, generations=1, verbosity=0) tpot_obj.fit(training_features, training_classes) assert isinstance(tpot_obj._optimized_pipeline, creator.Individual) assert tpot_obj.gp_generation == 0
phenotype = load_gametes['Class'] individuals = load_gametes.drop('Class', axis=1) X_train, X_test, y_train, y_test = train_test_split(individuals, phenotype, train_size=0.75, test_size=0.25) # Expert Knowledge Filter & MDR tpot = TPOT(generations=200, population_size=200, verbosity=2, expert_source=load_ekf) t1 = time.time() tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) t2 = time.time() print("Time lapsed: ", t2 - t1) # MDR Only #tpot = TPOT(generations=500, population_size=350, verbosity=2, expert_source=None) #t1 = time.time() #tpot.fit(X_train, y_train) #print(tpot.score(X_test, y_test)) #t2 = time.time() #print("Time lapsed: ", t2 - t1) # Random Forest #clf = RandomForestClassifier(max_depth=5, max_features=len(X_train.columns), # n_estimators=1000)
from tpot import TPOT from sklearn.cross_validation import train_test_split import pandas as pd import numpy as numpy telescope = pd.read_csv("MAGIC Gamma Telescope Data.csv") #clean the data telescope_shuffle = telescope.iloc[np.random.permutation(len(telescope))] tele = telescope_shuffle.reset_index(drop=True) #Store classes tele['Class'] = tele['Class'].map({'g':0, 'h':1}) tele_class = tele['Class'].values #Split data training_indices, validation_indices = training_indices, testing_indices = train_test_split(tele.index, stratify= tele_class, train_size=0.75, test_size=0.25) #find best model tpot = TPOT(generations=5, verbosity=2) tpot.fit(tele.drop('Class', axis=1).loc[training_indices].values, tele.loc[training_indices, "Class"].values) #Score the accuracy tpot.score(tele.drop('Class', axis=1).loc[validation_indices].values, tele.loc[validation_indices, 'Class'].values) #Export generated code tpot.export('pipeline.py')
'z': 2, 'rx': 3, 'ry': 4, 'rz': 5 }) data_class = data['Class'].values # split training, testing, and validation data training_indices, validation_indices = training_indices, testing_indices = train_test_split( data.index, stratify=data_class, train_size=0.75, test_size=0.25) # Let Genetic Programming to find the best ML model and hyperparamters # Verbosity 2 shows a loading bar tpot = TPOT(generations=5, verbosity=2) tpot.fit( data.drop('Class', axis=1).loc[training_indices].values, data.loc[training_indices, 'Class'].values) # Score the accuracy tpot.score( data.drop('Class', axis=1).loc[validation_indices].values, data.loc[validation_indices, 'Class'].values) # export the generated code tpot.export('pipeline.py') ## compute sigmoid nonlinearity ## normalizes numbers given #def sigmoid(x): # output = 1/(1+np.exp(-x)) # return output
from tpot import TPOT from sklearn.datasets import load_iris from sklearn.cross_validation import train_test_split iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, train_size=0.75, test_size=0.25) tpot = TPOT(generations=5,verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py')
train = train.drop(drop_list,axis=1) train = train[0:3000000:300] train.info(memory_usage='deep') X = train.drop("hotel_cluster",axis=1).values y = train.loc[: , "hotel_cluster"].values del train import gc gc.collect() X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75,test_size=0.25) print("got here!") my_tpot = TPOT(generations=20,verbosity=2,population_size=5) # seems to have a problem with pop <5 # gen 1-> really means two generations! start = time.clock() print(start) my_tpot.fit(X_train, y_train) my_tpot.export('tpot_expedia_pipeline.py') end = time.clock() duration = end - start score = my_tpot.score(X_test, y_test) print(duration,score)