def test_export(): """Assert that TPOT's export function throws a ValueError when no optimized pipeline exists""" tpot_obj = TPOT() try: tpot_obj.export("test_export.py") assert False # Should be unreachable except ValueError: pass
def test_export(): """Assert that TPOT's export function throws a ValueError when no optimized pipeline exists""" tpot_obj = TPOT() try: tpot_obj.export("test_export.py") assert False # Should be unreachable except ValueError: pass
def test_export(): """Ensure that the TPOT export function raises a ValueError when no optimized pipeline exists""" tpot_obj = TPOT() try: tpot_obj.export('will_not_output') assert False # Should be unreachable except ValueError: pass
def test_export(): """Ensure that the TPOT export function raises a ValueError when no optimized pipeline exists""" tpot_obj = TPOT() try: tpot_obj.export('will_not_output') assert False # Should be unreachable except ValueError: pass
from tpot import TPOT from sklearn.cross_validation import train_test_split import pandas as pd import numpy as numpy telescope = pd.read_csv("MAGIC Gamma Telescope Data.csv") #clean the data telescope_shuffle = telescope.iloc[np.random.permutation(len(telescope))] tele = telescope_shuffle.reset_index(drop=True) #Store classes tele['Class'] = tele['Class'].map({'g':0, 'h':1}) tele_class = tele['Class'].values #Split data training_indices, validation_indices = training_indices, testing_indices = train_test_split(tele.index, stratify= tele_class, train_size=0.75, test_size=0.25) #find best model tpot = TPOT(generations=5, verbosity=2) tpot.fit(tele.drop('Class', axis=1).loc[training_indices].values, tele.loc[training_indices, "Class"].values) #Score the accuracy tpot.score(tele.drop('Class', axis=1).loc[validation_indices].values, tele.loc[validation_indices, 'Class'].values) #Export generated code tpot.export('pipeline.py')
train_test_data = pd.DataFrame.to_dict(load_sen_gene, 'series') snps = train_test_data['#snp'] genes = train_test_data['gene'] d_array = {} for snp in snps: d_array[snp] = {} for i in range(len(clustered_genes)): if snp in (clustered_genes[i])['#snp'].values: d_array[snp][(clustered_genes[i])['gene'].values[0]] = 1.0 else: d_array[snp][(clustered_genes[i])['gene'].values[0]] = 0.0 df = pd.DataFrame(d_array).T phenotype = load_bladder_610K['phenotype'] individuals = load_bladder_610K.drop('phenotype', axis=1) X_train, X_test, y_train, y_test = train_test_split(individuals, phenotype, train_size=0.75, test_size=0.25, random_state=42) tpot = TPOT(generations=10, population_size=10, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_sen_gene_pipeline_b610k.py')
from tpot import TPOT from sklearn.datasets import load_iris from sklearn.cross_validation import train_test_split iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, train_size=0.75, test_size=0.25) tpot = TPOT(generations=5,verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py')
Testing TPOT [Tree-based Pipeline Optimization Tool] built by Randy Olson (http://www.randalolson.com/2015/11/15/introducing-tpot-the-data-science-assistant/) """ from tpot import TPOT import sys import pandas as pd from sklearn.datasets import load_digits from sklearn.cross_validation import train_test_split for i in range (1,len(sys.argv),2): if sys.argv[i] == "-df": DF = sys.argv[i+1] df = np.loadtxt(DF, skiprows=1, usecols=range(1,271)) #df = pd.read_csv(DF, sep='\t',header=0, index_col=0) print(df.info()) y = df[:,0] x = df[:,1:] X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=0.75) tpot = TPOT(generations=5, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_train, y_train, X_test, y_test)) tpot.export('tpot_NNU_k3_pro_p05_pipeline.py')
X = numpy.array(data['X']) Y = numpy.array(data['Y']) X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=0.75, test_size=0.25) X_train.shape, X_test.shape, y_train.shape, y_test.shape # In[ ]: tpot = TPOT(generations=1, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) # In[ ]: tpot.export('sentiment_pipeline2.py') """ with open("/media/Data2/workspace/projects/kalamacom/sentiment_model_selection.py") as f: code = compile(f.read(), "/media/Data2/workspace/projects/kalamacom/sentiment_model_selection.py", 'exec') exec(code) """
train = train.drop(drop_list,axis=1) train = train[0:3000000:300] train.info(memory_usage='deep') X = train.drop("hotel_cluster",axis=1).values y = train.loc[: , "hotel_cluster"].values del train import gc gc.collect() X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75,test_size=0.25) print("got here!") my_tpot = TPOT(generations=20,verbosity=2,population_size=5) # seems to have a problem with pop <5 # gen 1-> really means two generations! start = time.clock() print(start) my_tpot.fit(X_train, y_train) my_tpot.export('tpot_expedia_pipeline.py') end = time.clock() duration = end - start score = my_tpot.score(X_test, y_test) print(duration,score)