from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier from sklearn.naive_bayes import GaussianNB, BernoulliNB from sklearn.pipeline import make_pipeline from sklearn.tree import DecisionTreeClassifier from tpot.builtins import StackingEstimator from tools import prepare_dataset, load_test y, x = prepare_dataset() x_train = x[:614] y_train = y[:614].reshape(-1, ) x_valid = x[614:] y_valid = y[614:].reshape(-1, ) # Average CV score on the training set was:0.8469063987308303 exported_pipeline = make_pipeline( StackingEstimator(estimator=DecisionTreeClassifier(criterion="gini", max_depth=4, min_samples_leaf=15, min_samples_split=11)), StackingEstimator(estimator=BernoulliNB(alpha=0.01, fit_prior=False)), StackingEstimator(estimator=RandomForestClassifier(bootstrap=False, criterion="entropy", max_features=0.45, min_samples_leaf=3, min_samples_split=9, n_estimators=100)), StackingEstimator(estimator=ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.9000000000000001, min_samples_leaf=12, min_samples_split=16, n_estimators=100)), GaussianNB() ) exported_pipeline.fit(x_train, y_train) ids, x = load_test() predictions = exported_pipeline.predict(x) for index, id_number in enumerate(ids): print(str(int(id_number[0])) + "," + str(int(predictions[index])))
import analitics as a import numpy as np if __name__ == '__main__': normal = t.Data(conf.path, conf.data_set_type) normal.add_dataset_from_file('r2/normal/', 'normal', 'csv') timeout = t.Data(conf.path, conf.data_set_type) timeout.add_dataset_from_file('r2/normal/', 'timeout', 'csv') ds_sizes = 5000 ds_pos = 0 ds_power = [] ds_power = np.append( ds_power, t.prepare_dataset(normal.ods['Power'], ds_pos, ds_sizes, 'cut')) ds_power = np.append( ds_power, t.prepare_dataset(timeout.ods['Power'], ds_pos, ds_sizes, 'cut')) ds_temperature = [] ds_temperature = np.append( ds_temperature, t.prepare_dataset(normal.ods['Temp'], ds_pos, ds_sizes, 'diff')) ds_temperature = np.append( ds_temperature, t.prepare_dataset(timeout.ods['Temp'], ds_pos, ds_sizes, 'diff')) # t.plot_series(ds_power, 0, len(ds_power)) # t.plot_series(ds_temperature, 0, len(ds_temperature)) #