import numpy as np import pandas as pd from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import MinMaxScaler from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=74) # Average CV score on the training set was:0.7005217391304347 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=4, subset_list="module23.csv"), MinMaxScaler(), ExtraTreesClassifier(bootstrap=True, criterion="gini", max_features=0.45, min_samples_leaf=8, min_samples_split=8, n_estimators=100) ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.ensemble import GradientBoostingClassifier from sklearn.kernel_approximation import RBFSampler from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=98) # Average CV score on the training set was:0.693726362625139 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=14, subset_list="subsets.csv"), RBFSampler(gamma=0.65), GradientBoostingClassifier(learning_rate=0.5, max_depth=8, max_features=0.6500000000000001, min_samples_leaf=8, min_samples_split=3, n_estimators=100, subsample=0.8500000000000001) ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline from sklearn.preprocessing import MinMaxScaler from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=9) # Average CV score on the training set was:0.7172173913043478 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=12, subset_list="module23.csv"), MinMaxScaler(), KNeighborsClassifier(n_neighbors=21, p=2, weights="uniform")) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import MinMaxScaler from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=17) # Average CV score on the training set was:0.6930515387467556 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=0, subset_list="subsets.csv"), MinMaxScaler(), RandomForestClassifier(bootstrap=False, criterion="gini", max_features=0.25, min_samples_leaf=1, min_samples_split=8, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import MaxAbsScaler from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=22) # Average CV score on the training set was:0.7259130434782608 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=4, subset_list="module23.csv"), MaxAbsScaler(), ExtraTreesClassifier(bootstrap=True, criterion="gini", max_features=0.4, min_samples_leaf=4, min_samples_split=6, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import RobustScaler from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=48) # Average CV score on the training set was:0.6918260869565217 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=4, subset_list="module23.csv"), RobustScaler(), ExtraTreesClassifier(bootstrap=True, criterion="entropy", max_features=0.9500000000000001, min_samples_leaf=9, min_samples_split=14, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from tpot.builtins import DatasetSelector, ZeroCount # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=96) # Average CV score on the training set was:0.7070745272525027 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=0, subset_list="subsets.csv"), ZeroCount(), ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.8, min_samples_leaf=1, min_samples_split=8, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.ensemble import GradientBoostingClassifier from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=0) # Average CV score on the training set was:0.6939710789766408 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=0, subset_list="subsets.csv"), StandardScaler(), GradientBoostingClassifier(learning_rate=0.5, max_depth=4, max_features=0.6500000000000001, min_samples_leaf=7, min_samples_split=10, n_estimators=100, subsample=1.0)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import RobustScaler from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=54) # Average CV score on the training set was:0.680808305524657 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=0, subset_list="subsets.csv"), RobustScaler(), RandomForestClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=6, min_samples_split=14, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)