def bonus_nn_param(): from sklearn.neural_network import MLPClassifier as nn real_test, real_train, real_valid, fake_test, fake_train, fake_valid = get_test_train_valid( ) train_set, train_set_label, test_set, test_set_label, valid_set, valid_set_label = get_logistic_sets( real_test, real_train, real_valid, fake_test, fake_train, fake_valid) iter = np.arange(1, 1000, 50) hidden_list = np.arange(1, 25, 3) performance = np.zeros((hidden_list.shape[0] * iter.shape[0], 3)) network = nn() network.activation = 'relu' #relu/logistic/tanh network.learning_rate_init = 0.0001 #float network.learning_rate = 'constant' #adaptive/constant i = 0 for iteration in iter: for hidden in hidden_list: network.hidden_layer_sizes = tuple([hidden] * 4) network.max_iter = iteration network.fit(train_set.T, train_set_label.T) performance[i, 0] = hidden performance[i, 1] = iteration performance[i, 2] = network.score(valid_set.T, valid_set_label.T) print('done: #neuron=' + str(hidden) + 'iterations=' + str(iteration) + 'valid=' + str(performance[i, 2])) i += 1 return performance
def NeuralNetworkGridSearch(x_train_processed, y_train, param_grid): """A function that performs gridsearch and returns the results for neural networks""" neural_network = nn(activation='logistic', random_state=40, max_iter=1500, momentum=0, solver='sgd', early_stopping=True, validation_fraction=0.2) grid_search_NN = sk.model_selection.GridSearchCV(neural_network, param_grid, cv=3, scoring='accuracy') grid_search_NN.fit(x_train_processed, y_train.values.ravel()) # print output of best model print("\n***** Grid search outcomes for neural network") print("Training score: ", grid_search_NN.best_score_) print("Best hyper-parameters: ", grid_search_NN.best_params_) return grid_search_NN
parser.add_argument('--data-dir', type=str, default=os.environ['SM_CHANNEL_TRAIN']) # Model parameters parser.add_argument('--alpha', type=int, default=1) parser.add_argument('--max_iter', type=int, default=1000) # args holds all passed-in arguments args = parser.parse_args() # Read in csv training file training_dir = args.data_dir train_data = pd.read_csv(os.path.join(training_dir, "r_train.csv"), header=None, names=None) # Labels are in the first column train_y = train_data.iloc[:, 0] train_x = train_data.iloc[:, 1:] alpha = args.alpha max_iter = args.max_iter model = nn(alpha=alpha, max_iter=max_iter) model = model.fit(train_x, train_y) # Save the trained model joblib.dump(model, os.path.join(args.model_dir, "model.joblib"))
def detect_contact(data, fluid_type): if fluid_type == ["gas", "oil", "water"]: X = data[['depth', 'temp', 'pressure']] y = data['fluid'].str.lower() model = nn(hidden_layer_sizes = (10,10,10), max_iter = 10000, random_state = 300) model.fit(X,y) predicted = model.predict(X) gas = X[['depth', 'pressure']][predicted == 'gas'] oil = X[['depth', 'pressure']][predicted == 'oil'] water = X[['depth', 'pressure']][predicted == 'water'] reg_gas= LR() reg_oil= LR() reg_water= LR() reg_gas.fit(gas['depth'].values.reshape(-1,1),gas[ 'pressure']) reg_oil.fit(oil['depth'].values.reshape(-1,1),oil[ 'pressure']) reg_water.fit(water['depth'].values.reshape(-1,1),water[ 'pressure']) def line(p1, p2): A = (p1[1] - p2[1]) B = (p2[0] - p1[0]) C = (p1[0]*p2[1] - p2[0]*p1[1]) return A, B, -C def intersection(L1, L2): D = L1[0] * L2[1] - L1[1] * L2[0] Dx = L1[2] * L2[1] - L1[1] * L2[2] Dy = L1[0] * L2[2] - L1[2] * L2[0] if D != 0: x = Dx / D y = Dy / D return x,y else: return False x1 = np.array(0).reshape(1, -1) x2 = np.array(10000).reshape(1, -1) L1 = line([x1, reg_gas.predict(x1)], [x2, reg_gas.predict(x2)]) L2 = line([x1, reg_oil.predict(x1)], [x2, reg_oil.predict(x2)]) L3 = line([x1, reg_water.predict(x1)], [x2, reg_water.predict(x2)]) print("GOC: ", intersection(L1, L2)[0][0][0]) print("OWC: ", intersection(L2, L3)[0][0][0]) elif fluid_type == [ "oil", "water"]: model = nn(hidden_layer_sizes = (10,10,10), max_iter = 10000, random_state = 300) model.fit(X,y) predicted = model.predict(X) oil = X[['depth', 'pressure']][predicted == 'oil'] water = X[['depth', 'pressure']][predicted == 'water'] reg_oil= LR() reg_water= LR() reg_oil.fit(oil['depth'].values.reshape(-1,1),oil[ 'pressure']) reg_water.fit(water['depth'].values.reshape(-1,1),water[ 'pressure']) def line(p1, p2): A = (p1[1] - p2[1]) B = (p2[0] - p1[0]) C = (p1[0]*p2[1] - p2[0]*p1[1]) return A, B, -C def intersection(L1, L2): D = L1[0] * L2[1] - L1[1] * L2[0] Dx = L1[2] * L2[1] - L1[1] * L2[2] Dy = L1[0] * L2[2] - L1[2] * L2[0] if D != 0: x = Dx / D y = Dy / D return x,y else: return False x1 = np.array(0).reshape(1, -1) x2 = np.array(10000).reshape(1, -1) L2 = line([x1, reg_oil.predict(x1)], [x2, reg_oil.predict(x2)]) L3 = line([x1, reg_water.predict(x1)], [x2, reg_water.predict(x2)]) print("OWC: ", intersection(L2, L3)[0][0][0]) else: model = nn(hidden_layer_sizes = (10,10,10), max_iter = 10000, random_state = 300) model.fit(X,y) predicted = model.predict(X) gas = X[['depth', 'pressure']][predicted == 'gas'] water = X[['depth', 'pressure']][predicted == 'water'] reg_gas= LR() reg_oil= LR() reg_water= LR() reg_gas.fit(gas['depth'].values.reshape(-1,1),gas[ 'pressure']) reg_water.fit(water['depth'].values.reshape(-1,1),water[ 'pressure']) def line(p1, p2): A = (p1[1] - p2[1]) B = (p2[0] - p1[0]) C = (p1[0]*p2[1] - p2[0]*p1[1]) return A, B, -C def intersection(L1, L2): D = L1[0] * L2[1] - L1[1] * L2[0] Dx = L1[2] * L2[1] - L1[1] * L2[2] Dy = L1[0] * L2[2] - L1[2] * L2[0] if D != 0: x = Dx / D y = Dy / D return x,y else: return False x1 = np.array(0).reshape(1, -1) x2 = np.array(10000).reshape(1, -1) L1 = line([x1, reg_gas.predict(x1)], [x2, reg_gas.predict(x2)]) L3 = line([x1, reg_water.predict(x1)], [x2, reg_water.predict(x2)]) print("GWC: ", intersection(L1, L3)[0][0][0])
deleteIndex.append(i) data = np.delete(data, deleteIndex, 0) # remove the 4th column data = np.delete(data, 3, 1) data = data.astype(np.float) print data.shape # shuffle all of these properties in the same way data = shuffle(data) testSize = int(len(data) * 0.70) trainData, testData, trainFlags, testFlags = train_test_split( data[:, :-1], data[:, -1], test_size=testSize) clfs = [svm.SVC(), nn(solver='lbfgs'), knn(n_neighbors=200)] header = ["SVM Predictor", "Neural Net Predictor", "KNN Predictor"] for index, clf in enumerate(clfs): print header[index] start = time.time() clf.fit(trainData, trainFlags) predicted = clf.predict(testData) print "\tTime: %s" % (time.time() - start) labels = ['safe', 'mal'] labeledPredicted = addLabels(predicted) labeledTest = addLabels(testFlags) print "\tConfusion Matrix Scores: "
Svm.fit(Xtrain, ytrain) ypred = Svm.predict(Xtest) acc = accuracy_score(ytest, ypred) accsvm.append(acc) print "accuracy on svm= " + str(acc) #salvo il valore migliore if (acc > bestsvm): bestsvm = acc j = i #neural network con un hodden layer, varia il numero di perceptron 20 a 120 NN = nn(hidden_layer_sizes=(i * 10), random_state=5, learning_rate_init=0.001, solver='sgd', max_iter=300) NN.fit(Xtrain, ytrain) yprednn = NN.predict(Xtest) accnn = accuracy_score(ytest, yprednn) accsnn.append(accnn) print "accuracy on nn = " + str(accnn) print '' #salvo il valore migliore if (accnn > bestnn): bestnn = accnn nhidden = i * 10