dataset1 = np.loadtxt("pima-indians-diabetes.csv", delimiter=',') inputList = dataset1[:, 0:8] resultList = dataset1[:, 8] def create_default_model(optimizer='adam', init='uniform'): model = Sequential() model.add( Dense(12, input_dim=8, kernel_initializer=init, activation='relu')) model.add(Dense(8, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model model = KerasClassifier(build_fn=create_default_model, verbose=0) optimizers = ['rmsprop', 'adam'] inits = ['normal', 'uniform'] epochs = [50, 100, 150] batches = [5, 10, 15] param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=inits) grid = GridSearchCV(estimator=model, param_grid=param_grid) grid_result = grid.fit(inputList, resultList) print("test score, param:", grid_result.best_score_, grid_result.best_params_)
# define The Neural Network baseline model def baseline_model(): # create model model = Sequential() model.add(Dense(4, input_dim=4, init='normal', activation='relu')) model.add(Dense(3, init='normal', activation='sigmoid')) # Compile model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model # Evaluate The Model with k-Fold Cross Validation estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=200, batch_size=5, verbose=0) kfold = KFold(n_splits=10, shuffle=True, random_state=seed) results = cross_val_score(estimator, X_train, Y_train, cv=kfold) print("Baseline: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100)) # Make Predictions estimator.fit(X_train, Y_train) print "Accuracy: {}%\n".format(estimator.score(X_test, Y_test) * 100) predictions = estimator.predict(X_test) print(predictions) print(encoder.inverse_transform(predictions))
classifier = Sequential() classifier.add( Dense(6, kernel_initializer='uniform', activation='relu', input_shape=(11, ))) classifier.add(Dense(6, kernel_initializer='uniform', activation='relu')) classifier.add(Dense(1, kernel_initializer='uniform', activation='sigmoid')) classifier.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) return classifier classifier = KerasClassifier(build_fn=build_classifier) parameters = { 'batch_size': [25, 32], 'epochs': [100, 500], 'optimizer': ['adam', 'rmsprop'] } grid_search = GridSearchCV(estimator=classifier, param_grid=parameters, scoring='accuracy', cv=10) grid_search = grid_search.fit(X_train, y_train) best_parameters = grid_search.best_params_ best_accuracy = grid_search.best_score_
from keras.models import Sequential from keras.layers import Dense,Flatten,Embedding,LeakyReLU,PReLU, ELU,BatchNormalization from keras.layers import Dropout from keras.activations import relu, sigmoid from keras.wrappers.scikit_learn import KerasClassifier from sklearn.model_selection import GridSearchCV def create_model(activation,layers): classifier=Sequential() for i, nodes in enumerate(layers): if(i==0): classifier.add(Dense(units=layers,kernel_initializer='he_uniform',activation=activation,input_dim=X_train.shape[1])) else: classifier.add(Dense(units=layers,kernel_initializer='he_uniform',activation=activation)) classifier.add(Dense(output_dim=1,init='glorot_uniform',activation='sigmoid')) classifier.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy']) return classifier classifier = KerasClassifier(build_fn=create_model, verbose=0) layers = [[20], [40, 20], [45, 30, 15]] activations = ['sigmoid', 'relu'] param_grid = dict(layers=layers, activation=activations, batch_size = [128, 256], epochs=[30]) grid = GridSearchCV(estimator=classifier, param_grid=param_grid,cv=5) grid_result = grid.fit(X_train, y_train) print(grid_result.best_score_,grid_result.best_params_)
"A lot of good things are happening. We are respected again throughout the world, and that's a great thing" ) max_fatures = 2000 tokenizer = Tokenizer(num_words=max_fatures, split=' ') tokenizer.fit_on_texts(data) X = tokenizer.texts_to_sequences(data) X = pad_sequences(X, maxlen=28) print("Prediction: ", np.argmax(model.predict(X))) print("Actual: ", np.argmax(Y_test[0])) print() print() '''PART2 Apply GridSearchCV on the source code provided in the class''' model = KerasClassifier(build_fn=createmodel, verbose=1) batch_size = [10, 20] epochs = [1, 2] param_grid = dict(batch_size=batch_size, epochs=epochs) grid = GridSearchCV(estimator=model, param_grid=param_grid) grid_result = grid.fit(X_train, Y_train) print() print("GridSearchCV") print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) print() print() '''PART 3 Apply the code on spam data set available in the sourcecode (text classification on the spam.csv data set)''' data = pd.read_csv('spam.csv', encoding='ISO-8859-1') # Keeping only the neccessary columns
model.add(Dense(40, input_dim=28, activation='relu')) model.add(Dense(32, activation='relu')) model.add(Dense(24, activation='relu')) model.add(Dense(17, activation='relu')) model.add(Dense(15, activation='relu')) model.add(Dense(14, activation='relu')) model.add(Dense(13, activation='relu')) model.add(Dense(10, activation='relu')) model.add(Dense(9, activation='relu')) model.add(Dense(3, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model estimater = KerasClassifier(build_fn=base_model, epochs=70, batch_size=18, verbose=0) estimater.fit(train_x, dummy_y) kfold = KFold(n_splits=10, shuffle=True, random_state=seed) results = cross_val_score(estimater, train_x, dummy_y, cv=kfold) print("Baseline: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100)) # Option 1: Save Weights + Architecture estimater.model.save('cat_model2.h5') with open('model_architecture.json', 'w') as f: f.write(estimater.model.to_json())
def create_hyperparameters(): # 45개의 옵션(batches*optimizers*dropout)을 랜덤으로 돌린다. batches = [10, 20, 30, 40, 50, 100, 120] optimizers = ['rmsprop', 'adam', 'adadelta'] # 용도에 맞게 쓰자. dropout = np.linspace(0.1, 0.5, 5) epochs = [100, 200, 300, 400, 500] return { "kerasclassifier__batch_size": batches, "kerasclassifier__optimizer": optimizers, "kerasclassifier__epochs": epochs } from keras.wrappers.scikit_learn import KerasClassifier # 사이킷런과 호환하도록 함. (mnist에서 쓸듯) # from keras.wrappers.scikit_learn import KerasRegressor # 사이킷런의 교차검증을 keras에서 사용하기 위해 wrapping함 model = KerasClassifier(build_fn=build_network, verbose=1) # verbose=0 위에서 만든 함수형 모델 당겨옴. hyperparameters = create_hyperparameters( ) # batch_size, optimizer, dropout의 값을 반환해주는 함수 wrapping해옴. from sklearn.pipeline import make_pipeline from sklearn.pipeline import Pipeline pipe = make_pipeline(MinMaxScaler(), model) from sklearn.model_selection import RandomizedSearchCV search = RandomizedSearchCV(estimator=pipe, param_distributions=hyperparameters, n_iter=10, n_jobs=1, cv=3,
matrix1=ileJedynek.drop(['id'], axis=1) # rzeczywiste wartosci mb=confusion_matrix(matrix1.values,predictionarray) # dokładnosc modelu dokladnosc=mb[0,0]/(mb[0,0]+mb[1,0]) print("Dokładność modelu to: "+str(dokladnosc*100)+"%") # print(np.argmax(predictions[0])) tak można sprawdzic predykcje dla 1 (pacjenta) pozycji w tabeli dane #ANALIZA MODELU from sklearn.model_selection import GridSearchCV from keras.wrappers.scikit_learn import KerasClassifier model1=KerasClassifier(build_fn=createModel, verbose=0) # wprowadzam dane pomocnicze, aby model nie robil sie 10000000000000000000lat daneTemporalne= dane.sample(n=1000, random_state=1) x_train1=daneTemporalne.iloc[:,0:11] y_train1=daneTemporalne.iloc[:,-1] optimizers = ['adam', 'nadam'] activations = ['relu','softsign'] output_activations= ['softmax', 'sigmoid'] hidden_layers_number=[1,2,3,4] epochs= [10,20,30] batches= [10,20,30] param_grid= dict(optimizer=optimizers,epochs=epochs,batch_size=batches , activation= activations, output_activation=output_activations, hidden_layers=hidden_layers_number) # wprowadzam
def get_accuracies(data): X_train, X_test, y_train, y_test = get_balanced_data(data) seed = 1 rfc = RandomForestClassifier(bootstrap=True, max_depth=10, max_features='auto', min_samples_leaf=2, min_samples_split=10, n_estimators=500) rfc2 = RandomForestClassifier(bootstrap=False, max_depth=2, max_features='auto', min_samples_leaf=5, min_samples_split=20, n_estimators=100) gbm = GradientBoostingClassifier(min_samples_split=25, min_samples_leaf=25, loss='deviance', learning_rate=0.1, max_depth=5, max_features='auto', criterion='friedman_mse', n_estimators=100) def baseline_model(optimizer='adam', learn_rate=0.01): model = Sequential() model.add(Dense(100, input_dim=X_train.shape[1], activation='relu')) model.add( Dense(50, activation='relu') ) # 8 is the dim/ the number of hidden units (units are the kernel) model.add(Dense(2, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model keras = KerasClassifier(build_fn=baseline_model, batch_size=32, epochs=100, verbose=0, optimizer='Adam') outer_cv = KFold(n_splits=5, shuffle=True, random_state=seed) svm = SVC(gamma="scale", probability=True, kernel='rbf', C=0.5) models = [('GBM', gbm), ('RFC', rfc), ('RFC2', rfc2), ('Keras', keras), ('SVM', svm)] results = [] names = [] scoring = 'accuracy' accuracy = [] for name, model in models: cv_results = cross_val_score(model, X_train, y_train, cv=outer_cv, scoring=scoring) results.append(cv_results) names.append(name) # msg = "Cross-validation Accuracy %s: %f (+/- %f )" % (name, cv_results.mean() * 100, cv_results.std() * 100) # print(msg) model.fit(X_train, y_train) # print('Test set accuracy: {:.2f}'.format(model.score(X_test, y_test) * 100), '%') # accuracy.append(name) accuracy.append(model.score(X_test, y_test)) return accuracy
network.add(layers.Dense(units=len(selectedLanguage), activation='sigmoid')) # Compile neural network network.compile( loss='binary_crossentropy', # Cross-entropy optimizer='rmsprop', # Root Mean Square Propagation metrics=['accuracy']) # Accuracy performance metric # Return compiled network return network # Wrap Keras model so it can be used by scikit-learn neural_network = KerasClassifier(build_fn=create_network, epochs=100, batch_size=100, verbose=0) # Evaluate neural network using three-fold cross-validation score = cross_val_score(neural_network, x_train, y_train, cv=6) score = score.mean() from sklearn.model_selection import cross_val_predict from sklearn.metrics import confusion_matrix # Compiling the ANN filename = "C:\\Users\\Mustafa\\Desktop\\mc\\eightLanguageAlphabetCommonLetter.hdf5" #classifier.load_weights(filename) # Fitting the ANN to the Training set neural_network.fit(x_train, y_train, batch_size=20, nb_epoch=100,
return df # check the confusion matrix, precision and recall pretty_confusion_matrix(y_test, y_test_pred, labels=['Stay', 'Leave']) print(classification_report(y_test, y_test_pred)) # check if you still get the same results if you use a 5-Fold cross validation on all the data def build_logistic_regression_model(): model = Sequential() model.add(Dense(1, input_dim=20, activation='sigmoid')) model.compile(Adam(lr=0.5), 'binary_crossentropy', metrics=['accuracy']) return model model = KerasClassifier(build_fn=build_logistic_regression_model, epochs=10, verbose=0) cv = KFold(5, shuffle=True) scores = cross_val_score(model, X, y, cv=cv) print("The cross validation accuracy is {:0.4f} ± {:0.4f}".format( scores.mean(), scores.std())) print(scores)
def create_model(): # create model model = Sequential() model.add(Dropout(0.34, input_shape=(60, ))) model.add( Dense(60, init='normal', activation='relu', W_constraint=maxnorm(4))) model.add( Dense(600, init='normal', activation='relu', W_constraint=maxnorm(4))) model.add(Dense(300, activation='relu', W_constraint=maxnorm(4))) model.add(Dense(1, init='normal', activation='sigmoid')) # Compile model sgd = SGD(lr=0.2, momentum=1, decay=0.0, nesterov=False) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) return model numpy.random.seed(seed) estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', KerasClassifier(build_fn=create_model, nb_epoch=300, batch_size=8, verbose=0))) pipeline = Pipeline(estimators) kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=seed) results = cross_val_score(pipeline, X, encoded_Y, cv=kfold) print("Visible: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100))
units=4, kernel_initializer='uniform')) classifier.add(Dropout(0.1)) classifier.add( Dense(activation='relu', units=4, kernel_initializer='uniform')) classifier.add(Dropout(0.1)) classifier.add( Dense(activation='sigmoid', units=1, kernel_initializer='uniform')) classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) return classifier classifier = KerasClassifier(build_fn=build_function, batch_size=10, epochs=100) accuracy = cross_val_score(estimator=classifier, X=X_train, y=y_train, cv=10, n_jobs=1) mean = accuracy.mean() # 0.7442094135243448 variance = accuracy.std() # 0.04474760490264673 # Tunning the model using grid search from keras.wrappers.scikit_learn import KerasClassifier from sklearn.model_selection import GridSearchCV
model = Sequential() model.add(Dense(6, input_dim=8, activation='relu')) model.add(Dense(neurons, activation='relu')) model.add(Dense(1, activation='sigmoid')) optimizer = SGD(lr=0.2, momentum=0.02) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model # input data data = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=',') x = data[:, :8] y = data[:, 8] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7) model = KerasClassifier(build_fn=my_model, epochs=2, batch_size=10) grid_params = { 'neurons': list(range(3, 30, 3)) } grid = GridSearchCV(estimator=model, param_grid=grid_params, n_jobs=1) grid_result = grid.fit(x_train, y_train) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) means = grid_result.cv_results_['mean_test_score'] stds = grid_result.cv_results_['std_test_score'] params = grid_result.cv_results_['params'] for mean, stdev, param in zip(means, stds, params): print("%f (%f) with: %r" % (mean, stdev, param))
optimizer='adam', metrics=['accuracy']) return model #N sei se funciona perfeitamente com a train_test_split X = rows[2:-1, 1:-1] Y = rows[2:-1, -1] #print(rows) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, stratify=Y) ########## TESTE ################################ estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=50, batch_size=75, verbose=1))) pipeline = Pipeline(estimators) kfold = StratifiedKFold(n_splits=10, shuffle=True) results = cross_val_score(pipeline, X_train, Y_train, cv=kfold) print("Results: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100)) #train_test_split() #print(labels)
for layer_size in dense_layer_sizes: model.add(Dense(layer_size)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) return model dense_size_candidates = [[32], [64], [32, 32], [64, 64]] my_classifier = KerasClassifier(make_model, batch_size=32) validator = GridSearchCV( my_classifier, param_grid={ 'dense_layer_sizes': dense_size_candidates, # nb_epoch is avail for tuning even when not # an argument to model building function 'nb_epoch': [3, 6], 'nb_filters': [8], 'nb_conv': [3], 'nb_pool': [2] }, scoring='log_loss', n_jobs=1) validator.fit(X_train, y_train)
def baseline_model(): # create model model = Sequential() model.add(Dense(256, input_dim=len(X_train.columns), activation='relu')) model.add(Dropout(0.5)) model.add(Dense(512, activation='relu')) model.add(Dropout(0.5)) # model.add(Dense(1024, activation='relu')) model.add(Dense(len(df[stg.PLAYER_COL].unique()), activation='softmax')) # Compile model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model keras_model = KerasClassifier(build_fn=baseline_model, epochs=500, batch_size=1024, verbose=1) keras_model.fit(X_train, dummy_y_train) logging.info('.. Done') logging.info('Performance evaluation ..') pred_classes = keras_model.predict(X_test) pred_players = list(map(lambda x: mapping_class_player[x], pred_classes)) pa = PerformanceAnalyzer(y_true=y_test, y_pred=pred_players) accuracy = pa.compute_classification_accuracy() logging.info('Classification accuracy: {}'.format(accuracy)) logging.info('.. Done') logging.info('End of script {}'.format(basename(__file__)))
heart_dt, cv_results = param_search(x_train_heart, y_train_heart, search_type = 'Grid', param_dist = param_dist, model = heart_dt, scoring = 'roc_auc') print('====================================================================') print('PARAM SEARCH REPORT - DECISION TREE - HEART') param_search_report(cv_results, n_top = 100, save_as = 'heart_dt_param_report', verbose=True, print_n_top=10) pickle.dump(heart_dt, open("heart_dt.p","wb")) ### NN Classifier ### abalone ### abalone_nn = KerasClassifier(build_fn=create_nn_model, input_dim=10, verbose=0, epochs = 50, batch_size = 100, loss = 'binary_crossentropy') param_dist = {'layer1_nodes': [5,10,20,25], 'layer2_nodes': [5,10,20,25], 'learn_rate' : [0.001, 0.01, 0.1, 0.2, 0.3], 'momentum' : [0.0, 0.2, 0.4, 0.6, 0.8, 0.9] } abalone_nn, cv_results = param_search(x_train_abalone, y_train_abalone, search_type = 'Random', param_dist = param_dist, iterations = 100, model = abalone_nn, verbose = True, scoring = 'roc_auc')
optimizer='adam', metrics=['accuracy']) return model # データを読み込み --- (※2) data = json.load(open("./newstext/data-mini.json")) # data = json.load(open("./newstext/data.json")) X = np.array(data["X"]) # テキストを表すデータ Y = np.array(data["Y"]) # カテゴリデータ # 最大単語数を指定 max_words = len(X[0]) # 学習 --- (※3) X_train, X_test, Y_train, Y_test = train_test_split(X, Y) print(len(X_train),len(Y_train)) Y_train = np_utils.to_categorical(Y_train, nb_classes) model = KerasClassifier( build_fn=build_model, epochs=nb_epoch, batch_size=batch_size) model.fit(X_train, Y_train) # 予測 --- (※4) y = model.predict(X_test) ac_score = metrics.accuracy_score(Y_test, y) cl_report = metrics.classification_report(Y_test, y) print("正解率=", ac_score) print("レポート=\n", cl_report)
classifier.add( Dense(units=6, activation='relu', kernel_initializer='uniform')) classifier.add( Dense(units=1, activation='sigmoid', kernel_initializer='uniform')) classifier.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy']) return classifier # from keras.callbacks import ModelCheckpoint # checkpoint = ModelCheckpoint("best_model.hdf5", monitor='loss', verbose=1, save_best_only=True, mode='auto', period=1) # global classifier classifier = KerasClassifier(build_fn=build_Classifier, batch_size=10, epochs=20) accuracies = cross_val_score(estimator=classifier, X=x_train, y=y_train, cv=10, n_jobs=-1) # model accuracy mean = accuracies.mean() variance = accuracies.std() print("mean: ", round(mean, 5) * 100, "%", "\nvariance: ",
y = to_categorical(integer_encoded) X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=42) print(X_train.shape, Y_train.shape) print(X_test.shape, Y_test.shape) #batch_size = 32 classifier = createmodel() #model.fit(X_train, Y_train, epochs = 5, batch_size=batch_size, verbose = 2) # model.save('sentiment.h5') # score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size) # print(score) # print(acc) from keras import backend as K model = KerasClassifier(build_fn=classifier, verbose=0) batch_size = [2, 16] epochs = [2, 5] param_grid = dict(batch_size=batch_size, epochs=epochs) grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, scoring='accuracy') K.clear_session() grid_result = grid.fit(X_train, Y_train, batch_size=32) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
100 if positive_predictions > 0 else 0) positive_scores.append(positive_score) print('Accuracy of Positive Predictions: ', '\n', "%.1f%%" % positive_score, '\n-----------------------\n') return model # evaluate baseline model with standardized data set estimators = [] estimators.append(('standardize', StandardScaler())) class_weights = {0: 1, 1: 1} estimators.append([ 'mlp', KerasClassifier( build_fn=create_baseline, nb_epoch=100, batch_size=10, # class_weight=class_weights, verbose=0) ]) pipeline = Pipeline(estimators) kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) results = cross_val_score(pipeline, X, Y, cv=kfold) np.set_printoptions(precision=3, suppress=True) print("Standardized (conventional): %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100)) print('\nAccuracy Metrics:', '\n-----------------------') print( 'Accuracy Average of All Positive Predictions: %.2f%% Standard Deviation: (%.2f%%)' % (np.asarray(positive_scores).mean(), np.asarray(positive_scores).std())) print("Baseline Accuracy of Random Prediction: %.2f%% " % ((total_positive_examples / total_examples) * 100))
classifier = Sequential() classifier.add(Embedding(max_features, output_dim=256)) classifier.add(LSTM(128)) classifier.add(Dropout(0.5)) classifier.add(Dense(1, activation='sigmoid')) classifier.compile(loss='binary_crossentropy', optimizer='Adam', #'rmsprop', metrics=['accuracy']) return classifier #Now we should create classifier object using our internal classifier object in the function above classifier = KerasClassifier(build_fn= classifier_builder, batch_size = 1024, nb_epoch = 1) #10) if(os.access("lstm_model.h5", os.F_OK)): classifier=load_model('lstm_model.h5') hist=classifier.fit(X_train, y_train, batch_size=1024, epochs=runEpoch) print(hist.history) if(os.access("lstm_model.h5", os.F_OK)): print(classifier.summary()) classifier.save('lstm_model.h5') else: print(classifier.model.summary()) classifier.model.save('lstm_model.h5')
# baseline def create_baseline(): # create model model = Sequential() model.add(Dense(60, input_dim=60, init='normal', activation='relu')) model.add(Dense(30, init='normal', activation='relu')) model.add(Dense(1, init='normal', activation='sigmoid')) # Compile model sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) return model numpy.random.seed(seed) estimators = [] estimators.append(('standardize', StandardScaler())) estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, nb_epoch=300, batch_size=16, verbose=0))) pipeline = Pipeline(estimators) kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) results = cross_val_score(pipeline, X, encoded_Y, cv=kfold) print("Baseline: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100))
# 设定随机种子 seed = 7 np.random.seed(seed) # 构建模型函数 def create_model(optimizer='adam', init='glorot_uniform'): # 构建模型 model = Sequential() model.add( Dense(units=4, activation='relu', input_dim=4, kernel_initializer=init)) model.add(Dense(units=6, activation='relu', kernel_initializer=init)) model.add(Dense(units=3, activation='softmax', kernel_initializer=init)) # 编译模型 model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model model = KerasClassifier(build_fn=create_model, epochs=200, batch_size=5, verbose=0) kfold = KFold(n_splits=10, shuffle=True, random_state=seed) results = cross_val_score(model, x, Y, cv=kfold) print('Accuracy: %.2f%% (%.2f)' % (results.mean() * 100, results.std()))
def make_model2(): model = Sequential([ Dense(200, input_dim=483, activation='relu'), Dropout(0.85), Dense(200, activation='relu'), Dense(70, activation='relu'), BatchNormalization(), Dense(1, activation='sigmoid'), ]) rmsprop = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) return model mk = KerasClassifier(make_model1) m7 = Pipeline([('a', StandardScaler()), ('keras', mk)]) import catboost as cb m8 = cb.CatBoostClassifier() #在xbest上表现不错 from mymodels import OnegoStackingClassifier from sklearn.linear_model import LogisticRegression m9 = OnegoStackingClassifier([m1, m2, m3, m5, m6, m8], LogisticRegression(), n=3) from sklearn.feature_selection import SelectKBest, SelectFromModel, SelectFdr, SelectFpr, SelectFwe, RFECV from sklearn.base import clone m10 = Pipeline([('select', SelectFromModel(clone(m1), 'mean', False)),
# Dropout to avoid overfitting model.add(Dropout(0.25)) # Flatten the results to one dimension for passing into our final layer model.add(Flatten()) # A hidden layer to learn with model.add(Dense(nodes, activation='relu')) # Another dropout model.add(Dropout(0.5)) # Final categorization from 0-9 with softmax model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model # Grid Search from sklearn.model_selection import GridSearchCV from keras.wrappers.scikit_learn import KerasClassifier # Grid values grid_values = {'nodes': [64, 128, 256, 512]} model = KerasClassifier(build_fn=create_model, epochs=10, verbose=1) # Run Grid Search grid_model_hidden_acc = GridSearchCV(model, param_grid=grid_values) grid_model_hidden_acc.fit(train_images_noisy, train_labels) print("Best: %f using %s" % (grid_model_hidden_acc.best_score_, grid_model_hidden_acc.best_params_))
#Part 4 - Evaluating, Improving and Tuning the ANN # Evaluating the ANN from keras.wrappers.scikit_learn import KerasClassifier from sklearn.model_selection import cross_val_score from keras.models import Sequential from keras.layers import Dense def build_classifier(): classifier = Sequential() classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11)) classifier.add(Dropout(p = 0.1, )) classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu')) classifier.add(Dropout(p = 0.1, )) classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid')) classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) return classifier classifier = KerasClassifier(build_fn = build_classifier, batch_size = 10, nb_epoch = 100 ) accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = 1) mean = accuracies.mean() variance = accuracies.std() # Improving the ANN # Dropout # Tuning the ANN from keras.wrappers.scikit_learn import KerasClassifier from sklearn.model_selection import GridSearchCV from keras.models import Sequential from keras.layers import Dense def build_classifier(optimizer): classifier = Sequential() classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
x = Dense(128, activation=activation, name='hidden3')(x) x = Dropout(drop)(x) outputs = Dense(10, activation=activation, name='output')(x) model = Model(inputs=inputs, outputs=outputs) opt = optimizer(learning_rate=learning_rate) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['acc']) return model # 모델 만든 함수에서는 compile까지만, fit은 그리드서치에서 실행(cv가 있기 때문에) # 사이킷런에 쓸수 있게 KerasClassifier를 wrapping 한 것(모델, 파라미터, cv을 wrap해서 사용하겠다) # 랜덤서치에 들어갈 첫번째 모델 구성 from keras.wrappers.scikit_learn import KerasClassifier model = KerasClassifier(build_fn=build_model, verbose=1) # build_fn: 호출가능한 함수 혹은 클레스 인스턴스 #2-2. 파라미터 구성(함수) # 랜덤서치의 두번째 매개변수인 parameter도 함수로 정의 def create_hyperparameters(): batches = [100, 200, 300] optimizers = [RMSprop, Adam, Adadelta, SGD, Adagrad, Nadam] learning_rate = np.linspace(0.1, 1.0, 10).tolist() dropout = np.linspace(0.1, 0.5, 5).tolist() activation = [ 'tanh', 'relu', 'elu', "selu", "softmax", "sigmoid", LeakyReLU()
checkpoint = ModelCheckpoint(config.MODEL_PATH, monitor='acc', verbose=1, save_best_only=True, mode='max') reduce_lr = ReduceLROnPlateau(monitor='acc', factor=0.5, patience=2, verbose=1, mode='max', min_lr=0.00001) callbacks_list = [checkpoint, reduce_lr] cnn_clf = KerasClassifier(build_fn=cnn_model, batch_size=config.BATCH_SIZE, validation_split=10, epochs=config.EPOCHS, verbose=1, # progress bar - required for CI job callbacks=callbacks_list, image_size=config.IMAGE_SIZE ) if __name__ == '__main__': model = cnn_model() model.summary()