def mlp_bench(x_train, y_train, x_test, fh): """ Forecasts using a simple MLP which 6 nodes in the hidden layer :param x_train: train input data :param y_train: target values for training :param x_test: test data :param fh: forecasting horizon :return: """ y_hat_test = [] model = MLPRegressor(hidden_layer_sizes=6, activation='identity', solver='adam', max_iter=100, learning_rate='adaptive', learning_rate_init=0.001, random_state=42) model.fit(x_train, y_train) last_prediction = model.predict(x_test)[0] for i in range(0, fh): y_hat_test.append(last_prediction) x_test[0] = np.roll(x_test[0], -1) x_test[0, (len(x_test[0]) - 1)] = last_prediction last_prediction = model.predict(x_test)[0] return np.asarray(y_hat_test)
def construct_train(train_length, **kwargs): """ Train and test model with given input window and number of neurons in layer """ start_cur_postion = 0 steps, steplen = observations.size/(2 * train_length), train_length if 'hidden_layer' in kwargs: network = MLPRegressor(hidden_layer_sizes=kwargs['hidden_layer']) else: network = MLPRegressor() quality = [] # fit model - configure parameters network.fit(observations[start_cur_postion:train_length][:, 1].reshape(1, train_length), observations[:, 1][start_cur_postion:train_length].reshape(1, train_length)) parts = [] # calculate predicted values # for each step add all predicted values to a list # TODO: add some parallelism here for i in xrange(0, steps): parts.append(network.predict(observations[start_cur_postion:train_length][:, 1])) start_cur_postion += steplen train_length += steplen # estimate model quality using result = np.array(parts).flatten().tolist() for valnum, value in enumerate(result): quality.append((value - observations[valnum][1])**2) return sum(quality)/len(quality)
def test_partial_fit_regression(): # Test partial_fit on regression. # `partial_fit` should yield the same results as 'fit' for regression. X = Xboston y = yboston for momentum in [0, .9]: mlp = MLPRegressor(solver='sgd', max_iter=100, activation='relu', random_state=1, learning_rate_init=0.01, batch_size=X.shape[0], momentum=momentum) with warnings.catch_warnings(record=True): # catch convergence warning mlp.fit(X, y) pred1 = mlp.predict(X) mlp = MLPRegressor(solver='sgd', activation='relu', learning_rate_init=0.01, random_state=1, batch_size=X.shape[0], momentum=momentum) for i in range(100): mlp.partial_fit(X, y) pred2 = mlp.predict(X) assert_almost_equal(pred1, pred2, decimal=2) score = mlp.score(X, y) assert_greater(score, 0.75)
class Ann: def __init__(self): self._nn = MLPRegressor(hidden_layer_sizes=(10,), verbose=False, warm_start=True) self._entradas_entrenamiento = [] self._salidas_esperadas_entrenamiento = [] self.lambdaCoefficient = 0.9 def evaluar(self, entrada): return self._nn.predict(entrada) def agregar_a_entrenamiento(self, tableros, resultado): tableros.reverse() for i in xrange(len(tableros)): tablero, valorEstimado = tableros[i][0], tableros[i][1] self._entradas_entrenamiento.append(tablero) if i == 0 or True: self._salidas_esperadas_entrenamiento.append(resultado.value) else: valorAAprender = valorEstimado + self.lambdaCoefficient * (self._salidas_esperadas_entrenamiento[i-1] - valorEstimado) self._salidas_esperadas_entrenamiento.append(valorAAprender) def entrenar(self): self._nn.partial_fit(self._entradas_entrenamiento, self._salidas_esperadas_entrenamiento) self._entradas_entrenamiento = [] self._salidas_esperadas_entrenamiento = [] def almacenar(self): pickle.dump(self._nn, open(self.path,'wb')) def cargar(self, path, red): self.path = path if os.path.isfile(path): self._nn = pickle.load(open(path, 'rb')) else: self._nn = red tableroVacio = ([EnumCasilla.EMPTY.value for _ in xrange(64)],0) self.agregar_a_entrenamiento([tableroVacio], EnumResultado.EMPATE) self.entrenar()
ti = "Importance of Numeric and Categorical Encoded Features. Gradient Boosting Regressor(PCA)" display_importance(GradientBoostingRegressor(max_depth=4, n_estimators=32 * 8), X_train_cat_enc_pca, y_train_cat_enc, ti, 18) """## MLP Regressors. Scikit-Learn Fit Regressors """ mlpr = MLPRegressor(hidden_layer_sizes=(32 * 8, ), max_iter=500, solver='adam', batch_size=12, learning_rate='adaptive', verbose='True') mlpr.fit(X_train, y_train) y_train_mlpr = mlpr.predict(X_train) y_test_mlpr = mlpr.predict(X_test) scores('MLP Regressor. Numeric Features', y_train, y_test, y_train_mlpr, y_test_mlpr) mlpr_cat = MLPRegressor(hidden_layer_sizes=(32 * 8, ), max_iter=500, solver='adam', batch_size=12, learning_rate='adaptive', verbose='True') mlpr_cat.fit(X_train_cat, y_train_cat) y_train_cat_mlpr = mlpr_cat.predict(X_train_cat) y_test_cat_mlpr = mlpr_cat.predict(X_test_cat) scores('MLP Regressor. Numeric and Categorical Features', y_train_cat,
class Ann: ''' Implementación e interfaz de la funcionalidad presentada de la ANN ''' def __init__(self): self._nn = MLPRegressor(hidden_layer_sizes=(10,), verbose=False, warm_start=True) self._entradas_entrenamiento = [] self._salidas_esperadas_entrenamiento = [] # Parámetro de TD-lambda self.lambdaCoefficient = 0.9 def evaluar(self, entrada): ''' Devuelve la evaluación de la red para la entrada ''' return self._nn.predict(entrada) def agregar_a_entrenamiento(self, tableros, resultado): ''' Incorpora los datos de la partida a los ejemplos de entrenamiento ''' # Presento la partida de adelante para atrás tableros.reverse() for i in xrange(len(tableros)): # Representación del tablero, Valor estimado tablero, valorEstimado = tableros[i][0], tableros[i][1] self._entradas_entrenamiento.append(tablero) if i == 0 or True: # Si es el resultado final, utilizo como salida esperada el resultado de la partida self._salidas_esperadas_entrenamiento.append(resultado.value) else: # El valor a aprender dado por según TD-lambda valorAAprender = valorEstimado + self.lambdaCoefficient * ( self._salidas_esperadas_entrenamiento[i - 1] - valorEstimado) self._salidas_esperadas_entrenamiento.append(valorAAprender) def entrenar(self): ''' Aplico el entrenamiento a partir de los datos almacenados ''' self._nn.partial_fit(self._entradas_entrenamiento, self._salidas_esperadas_entrenamiento) self._entradas_entrenamiento = [] self._salidas_esperadas_entrenamiento = [] def almacenar(self): ''' Serializo y persisto la red ''' pickle.dump(self._nn, open(self.path, 'wb')) def cargar(self, path, red): ''' Deserealizo o creo una nueva red ''' self.path = path if os.path.isfile(path): # Si el archivo especificado existe, deserealizo la red self._nn = pickle.load(open(path, 'rb')) else: # Si no, inicializo la red especificada self._nn = red tableroVacio = ([EnumCasilla.EMPTY.value for _ in xrange(64)], 0) self.agregar_a_entrenamiento([tableroVacio], EnumResultado.EMPATE) self.entrenar()
from __future__ import print_function, division from future.utils import iteritems from builtins import range, input # Note: you may need to update your version of future # sudo pip install -U future import numpy as np from sklearn.neural_network import MLPRegressor from util import getKaggleMNIST # get data X, _, Xt, _ = getKaggleMNIST() # create the model and train it model = MLPRegressor() model.fit(X, X) # test the model print("Train R^2:", model.score(X, X)) print("Test R^2:", model.score(Xt, Xt)) Xhat = model.predict(X) mse = ((Xhat - X)**2).mean() print("Train MSE:", mse) Xhat = model.predict(Xt) mse = ((Xhat - Xt)**2).mean() print("Test MSE:", mse)
y_pred = bayes.predict(X_test) print "O MAE do bayes foi "+str(mean_absolute_error(y_test, y_pred)) ############################################################################# #Aplicacao do Neural Net Regressor nn_parameters = {'hidden_layer_sizes':[10,20,30,40]} grid_nn = GridSearchCV(MLPRegressor(solver='lbfgs'), nn_parameters, cv=3) grid_nn.fit(X_train, y_train) nnet = MLPRegressor(hidden_layer_sizes=grid_nn.best_params_['hidden_layer_sizes'], solver='lbfgs') nnet.fit(X_train, y_train) y_pred = nnet.predict(X_test) print "O MAE da nnet foi "+str(mean_absolute_error(y_test, y_pred)) ############################################################################# #Carregando o conjunto de dados de teste do csv usando o pandas data_test = pd.read_csv('test.csv', header=None) #Convertendo os dados categoricos para labels numericos for column in categoricos: data_test[column-1] = pd.Categorical(data_test[column-1]).codes numericos_array_test = data_test[numericos-1].values ############################################################################# #Juntando os dados de teste numericos e categoricos
f2 = open("DATA/OPT_NN2.dat", "w") f3 = open("DATA/OPT_NN3.dat", "w") f3 = open("DATA/OPT_NN4.dat", "w") for layer1 in range(5, 151): for layer2 in range(1, 2): for rate in np.linspace(0.0001, 0.1, 1000): regr = MLPRegressor(hidden_layer_sizes=(layer1,),activation='relu',\ solver='adam', alpha=0.0001, batch_size='auto', \ learning_rate='constant', learning_rate_init=rate, \ power_t=0.5, max_iter=1000, shuffle=True, random_state=None, \ tol=0.0001, verbose=False, warm_start=False, momentum=0.9, \ nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1,\ beta_1=0.9, beta_2=0.999, epsilon=1e-08) regr.fit(X, y) y2 = regr.predict(X2) print(layer1, layer2, rate) L1_error_xnorm_dt = sum(abs(y2[:, 0] - Y3[:, 0])) / n_test L1_error_ynorm_dt = sum(abs(y2[:, 1] - Y3[:, 1])) / n_test L1_error_area_dt = sum(abs(y2[:, 2] - Y3[:, 2])) / n_test if (L1_error_xnorm_dt < err1): f1 = open("DATA/OPT_NN1.dat", "w") f1.write('%d, ' % layer1) f1.write('%d, ' % layer2) f1.write('%f' % rate) f1.write('\n') f1.close()
#Writting name and UID results.write("UID: " + uid + " Name: " + name + " Seed: " + str(seed) + "\n") #Generating sample data data, target = load_diabetes(return_X_y=True) data_train, data_test, target_train, target_test = train_test_split( data, target, test_size=.25, train_size=.75) #Error BackProp w/Regression Learning mlp = MLPRegressor(max_iter=200, random_state=13) ####Before Training(After 1 epoch) mlp.partial_fit(data_train, target_train) #RMSE Train Data predict_train0 = mlp.predict(data_train) rmse_train0 = rmse(target_train, predict_train0) #RMSE Test Data predict_test0 = mlp.predict(data_test) rmse_test0 = rmse(target_test, predict_test0) ####After Training mlp.fit(data_train, target_train) #RMSE Train Data predict_train1 = mlp.predict(data_train) rmse_train1 = rmse(target_train, predict_train1) #RMSE Test Data predict_test1 = mlp.predict(data_test) rmse_test1 = rmse(target_test, predict_test1)
## model = pickle.load(file) for i in range(1, 81): # load training data training_data = pd.read_csv( 'training_datasets/training_data_{}.csv'.format(i), header=None) x = training_data.iloc[:, :-1] y = training_data.iloc[:, -1] # (re)train model model.partial_fit(x, y) # see how results are changing with more learning # expected score = lowest test_x = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]).reshape(1, -1) print(model.predict(test_x)) # expected score = low test_x = np.array([0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0]).reshape(1, -1) print(model.predict(test_x)) # expected score = high test_x = np.array([0, 0, 0, 0, 2, 0, 1, 2, 0, 0, 1]).reshape(1, -1) print(model.predict(test_x)) # expected score = highest test_x = np.array([0, 1, 1, 2, 2, 1, 2, 0, 0, 0, 0]).reshape(1, -1) print(model.predict(test_x)) print('------------------------') # dump pickled model
Y_tr = pheno[:1000,1:] #slicing pheno #Y_va = pheno[201:250,:] Y_te = pheno[1001:,1:] diabetes_X_train = X_tr diabetes_X_test = X_te diabetes_y_train = Y_tr diabetes_y_test = Y_te reg = MLPRegressor(hidden_layer_sizes=(1, ),algorithm='l-bfgs') reg.fit(X_tr,Y_tr) scores = cross_val_score(reg,geno[:,1:],pheno[:,1:],cv=10) #Result_Y = np.zeros((249,1), dtype='float64') Result_Y = reg.predict(X_te) #Yte = np.array(Y_te, dtype=np.float64) r_row,p_score = pearsonr(Result_Y,Y_te) # The mean square error print("Residual sum of squares: %.2f" % np.mean((reg.predict(diabetes_X_test) - diabetes_y_test) ** 2)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % reg.score(diabetes_X_test, diabetes_y_test)) print(Result_Y) print(scores) print(Result_Y.shape) print(r_row) print(p_score)
import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPRegressor data = pd.read_csv("temp.csv") data.head() X = data['temp'].values Y = data['hum'].values X = np.reshape(X, (-1, 1)) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30) rna = MLPRegressor(solver='lbfgs', activation='logistic', max_iter=100000, hidden_layer_sizes=(15)) rna.fit(X_train, y_train) Y2 = rna.predict(X_test) print(Y2) print("Coeficiente de determinación: ", rna.score(X_test, y_test)) import matplotlib.pyplot as plt plt.plot(X_test, y_test, 'bo', label='Original') plt.plot(X_test, Y2, 'ro', label='Predicción') plt.legend(loc='upper right') plt.ylabel('Humedad') plt.xlabel('Temperatura') plt.show()
def example_data(rows=100): x = np.linspace(start=0, stop=30, num=100).reshape((rows, 1)) #y = 2*np.sin(x) + x y = np.sqrt(x) # scale the data x = (x - 15) / 10 y = y / 10 return x, y.reshape(len(y)) X, y = example_data() model = MLPRegressor(hidden_layer_sizes=(50, 5), activation='relu', shuffle=False, batch_size=len(y), solver='sgd', alpha=0, learning_rate='constant', learning_rate_init=0.0001, max_iter=100000, validation_fraction=0) model.fit(X=X, y=y) print(model.loss_) yhat = model.predict(X) plt.plot(y) plt.plot(yhat) plt.show()
from sklearn.neural_network import MLPRegressor import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import mean_squared_error data = pd.read_csv('network_backup_dataset.csv') train = data.loc[:,['WeekNumber','DayofWeek','BackupStartTime','WorkFlowID','FileName','BackupTime']] target = data.loc[:,['SizeofBackup']] mlp = MLPRegressor(algorithm='sgd', hidden_layer_sizes=150, max_iter=200, shuffle=False, random_state=1) mlp.fit(train, target) prediction = mlp.predict(train) plt.plot(prediction,label='Prediction',color='red') plt.plot(target,label='Real Data',color='blue') plt.title('Copy Size versus Time based on Neural Network Regression') plt.xlabel('Time') plt.ylabel('Copy Size') plt.legend() plt.show() rmse = mean_squared_error(target.SizeofBackup,prediction)**0.5 print (rmse)
def main(): # Sets working directory os.chdir("/project/csbio/henry/Documents/projects/draftsim/MTG") output_folder = ("ml") # Sets seed for reproducibility seed = 5001 # Reads in mtgJSON data setName = 'GRN' jsonSubset = None with open('../data/all_sets.json', 'r', encoding='utf-8') as json_data: mtgJSON = json.load(json_data) jsonSubset = mtgJSON[setName]['cards'] if setName == 'XLN': jsonSubset = jsonSubset + mtgJSON['RIX']['cards'] # Converts cards to dict with lowercase names as indices for cards this_set = {utils.getName(card): card for card in jsonSubset} dict((k.lower(), v) for k, v in this_set.items()) cardlist = list(this_set.keys()) # Reads in draftsim data and formats it rec_data = pd.read_csv("../data/GRNrecdata.csv", names=["deck", "pack", "pick"]) rec_data = rec_data.drop(["deck"], axis=1) rec_data["pack"] = [ re.sub('_\d+', '', x).lower() for x in rec_data["pack"] ] rec_data["pick"] = [ re.sub('_\d+', '', x).lower() for x in rec_data["pick"] ] # One-hot encodes draftsim data labels = dict(zip(cardlist, range(len(cardlist)))) rec_data["pick"] = [labels[x] for x in rec_data["pick"]] rec_data["pack"] = rec_data["pack"].astype(object) rec_data["pick"] = rec_data["pick"].astype(object) rec_data["pack"] = [ast.literal_eval(x) for x in rec_data["pack"]] formatted = rec_data for index, row in rec_data.iterrows(): pick_encode = [0 for i in range(len(cardlist))] pack_encode = [0 for i in range(len(cardlist))] pick_encode[row["pick"]] = 1 for name in row["pack"]: pack_encode[labels[name]] = 1 formatted.at[index, "pick"] = pick_encode formatted.at[index, "pack"] = pack_encode final = np.zeros(formatted.shape[0], dtype=[('x', 'int', len(cardlist)), ('y', 'int', len(cardlist))]) for i in range(formatted.shape[0]): final["x"][i] = [el for el in formatted["pack"][i]] final["y"][i] = [el for el in formatted["pick"][i]] # Converts to training/test data with an 80/20 split x_train, x_test, y_train, y_test, = train_test_split(final["x"], final["y"], test_size=0.2, random_state=seed) # Trains an MLP regressor model = MLPRegressor() grid = dict(activation=["relu"], solver=["adam"], hidden_layer_sizes=[(500, 1000, 500)], alpha=[1e-5, 1e-3, 0.1, 10], random_state=[seed], early_stopping=[True], max_iter=[50]) model = GridSearchCV(model, param_grid=grid, verbose=True, n_jobs=16, cv=5) model.fit(x_train, y_train) print(model.best_params_) # Gets training and testing metrics train_predictions = np.asarray(model.predict(x_train)) test_predictions = np.asarray(model.predict(x_test)) train_correct = 0 test_correct = 0 for i in range(train_predictions.shape[0]): choices = np.where(x_train[i] == 1) predictions = train_predictions[i][choices] correct_ind = np.where(y_train[i][choices] == 1) ranks = np.sort(predictions)[::-1] if (len(ranks) > 1): if ranks[0] == correct_ind or ranks[1] == correct_ind: train_correct += 1 else: if ranks[0] == correct_ind: train_correct += 1 for i in range(test_predictions.shape[0]): choices = np.where(x_test[i] == 1) predictions = test_predictions[i][choices] correct_ind = np.where(y_test[i][choices] == 1) #ranks = np.sort(predictions)[::-1] ranks = np.sort(predictions) if (len(ranks) > 1): if ranks[0] == correct_ind or ranks[1] == correct_ind: test_correct += 1 else: if ranks[0] == correct_ind: test_correct += 1 print(train_correct) print(float(train_correct) / float(len(train_predictions)) * 100) print(float(test_correct) / float(len(test_predictions)) * 100)
mlp = MLPRegressor(hidden_layer_sizes=(4, 4, 4), activation='relu', solver='adam', max_iter=500) lm.fit(x_train, y_train) mlp.fit(x_train, y_train) clf.fit(x_train, y_train) dtr.fit(x_train, y_train) rdf.fit(x_train, y_train) svmmodel.fit(x_train, y_train) from sklearn.metrics import r2_score, mean_absolute_error print("Linear Regression Model Accuracy : %f" % (r2_score(y_test, lm.predict(x_test)))) print("Multilayer Perceptron Regression Model Accuracy : %f" % (r2_score(y_test, mlp.predict(x_test)))) print("Support Vector Machine Regression Model Accuracy : %f" % (r2_score(y_test, svmmodel.predict(x_test)))) print("Decision Tree Regression Model Accuracy : %f)" % (r2_score(y_test, dtr.predict(x_test)))) print("Gredient Boosting Regression Model Accuracy : %f)" % (r2_score(y_test, clf.predict(x_test)))) print("Random Forest Regression Model Accuracy : %f)" % (r2_score(y_test, rdf.predict(x_test)))) print("All Model's Mean Squared Error ") print("Linear Regression Model MSE Error : %f" % (mean_squared_error(y_test, lm.predict(x_test)))) print("Multilayer Perceptron Regression Model MSE Error : %f" % (mean_squared_error(y_test, mlp.predict(x_test)))) print("Support Vector Machine Regression Model MSE Error : %f" %
# print('\nModelo - KNeighborsRegressor') # KNeighborsRegressor # a = time.process_time() # modelo_svr = KNeighborsRegressor() # modelo_svr = modelo_svr.fit(x_treino, y_treino) # predicoes_svr = modelo_svr.predict(x_teste) # qualidade_svr0 = mean_squared_error(y_teste, predicoes_svr) # del modelo_svr, predicoes_svr # resultados['KNeighborsRegressor \t\t']=qualidade_svr0 # print(f'Tempo gasto: {time.process_time()- a} s') # # print('\nModelo - MLPRegressor') # MLPRegressor a = time.process_time() modelo_svr = MLPRegressor(alpha=1, max_iter=500) modelo_svr = modelo_svr.fit(x_treino, y_treino) predicoes_svr = modelo_svr.predict(x_teste) qualidade_svr0 = mean_squared_error(y_teste, predicoes_svr) # del modelo_svr, predicoes_svr resultados['MLP alpha=3: \t\t'] = qualidade_svr0 print(f'Tempo gasto: {time.process_time()- a} s') # # # print('\nModelo - MLPRegressor') # MLPRegressor # a = time.process_time() # modelo_svr = MLPRegressor(solver='sgd') # modelo_svr = modelo_svr.fit(x_treino, y_treino) # predicoes_svr = modelo_svr.predict(x_teste) # qualidade_svr0 = mean_squared_error(y_teste, predicoes_svr) # del modelo_svr, predicoes_svr # resultados['MLP sgd: \t\t']=qualidade_svr0 # print(f'Tempo gasto: {time.process_time()- a} s')
df = pd.DataFrame(Q, columns=['index', 'prime']) ax1 = df.plot.scatter(x='index', y='prime', c='DarkBlue') ################################### NEURAL NETWORK #################################################### neural_net = MLPRegressor([500, 500], random_state=9, max_iter=2000).fit(X[:, :-1], X[:, -1]) ################################### TESTING DATA ################################################## testingdata = indexedprimesfrom2to(1000000) residuals = [] Y = [] percent_error = [] for i in range(0, len(testingdata)): nnresult = float(neural_net.predict([[i]])) actualnumber = testingdata[i][1] Y += [[i + 1, nnresult]] residuals += [[i + 1, nnresult - actualnumber]] percent_error += [[ i + 1, 100 * abs(nnresult - actualnumber) / actualnumber ]] df2 = pd.DataFrame(percent_error, columns=['index', 'percenterror']) ax2 = df2.plot.scatter(x='index', y='percenterror', c='DarkBlue') print(percent_error)
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00244/fertility_Diagnosis.txt' X, y = load_csv(uri, ',', 0, 9, 9, 10, True) y = pd.get_dummies(y.ravel(), drop_first=True) ''' Split into training and test set ''' X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) ''' Feature scaling ''' from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) sc_y = StandardScaler() y_train = sc.fit_transform(y_train) y_test = sc.transform(y_test) ''' Fit DecisionTreeRegressor with Bike Day data ''' regressor = MLPRegressor(hidden_layer_sizes=(100, 50)) regressor.fit(X_train, y_train) ''' Predicting result ''' y_pred = regressor.predict(X_test) rmse = sqrt(mean_squared_error(y_test, y_pred))
class QN(object): def __init__(self, num_inputs, num_outputs): self.nx = num_inputs self.ny = num_outputs self.net = MLPRegressor(hidden_layer_sizes=(50, 10), max_iter=1, algorithm='sgd', learning_rate='constant', learning_rate_init=0.001, warm_start=True, momentum=0.9, nesterovs_momentum=True ) self.initialize_network() # set experience replay self.mbsize = 128 # mini-batch size self.er_s = [] self.er_a = [] self.er_r = [] self.er_done = [] self.er_sp = [] self.er_size = 2000 # total size of mb, impliment as queue self.whead = 0 # write head def initialize_network(self): # function to initialize network weights xtrain = np.random.rand(256, self.nx) ytrain = 10 + np.random.rand(256, self.ny) self.net.fit(xtrain, ytrain) def update_network(self): # function updates network by sampling a mini-batch from the ER # Prepare train data chosen = list(np.random.randint(len(self.er_s), size=min(len(self.er_s), self.mbsize))) Xtrain = np.asarray([self.er_s[i] for i in chosen]) # calculate target target = np.random.rand(len(chosen), self.ny) for j, i in enumerate(chosen): # do a forward pass through s and sp Q_s = self.net.predict(self.er_s[i].reshape(1, -1)) Q_sp = self.net.predict(self.er_sp[i].reshape(1, -1)) target[j, :] = Q_s # target initialized to current prediction if (self.er_done[i] == True): target[j, self.er_a[i]] = self.er_r[i] # if end of episode, target is terminal reward else: target[j, self.er_a[i]] = self.er_r[i] + 0.9 * max(max(Q_sp)) # Q_sp is list of list (why?) # fit the network self.net.fit(Xtrain, target) # single step of SGD def append_memory(self, s, a, r, sp, done): if (len(self.er_s) < self.er_size): self.er_s.append(s) self.er_a.append(a) self.er_r.append(r) self.er_sp.append(sp) self.er_done.append(done) self.whead = (self.whead + 1) % self.er_size else: self.er_s[self.whead] = s self.er_a[self.whead] = a self.er_r[self.whead] = r self.er_sp[self.whead] = sp self.er_done[self.whead] = done self.whead = (self.whead+1) % self.er_size
loo = LeaveOneOut() loo.get_n_splits(X) for train_index, test_index in loo.split(X): # print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] scaler = StandardScaler() scaler.fit(X_train) StandardScaler(copy=True, with_mean=True, with_std=True) X_train = scaler.transform(X_train) # print(X_train) X_test = scaler.transform(X_test) mlp = MLPRegressor(hidden_layer_sizes=(8, 8, 8), max_iter=5000, solver='lbfgs') mlp.fit(X_train, y_train) predictions = mlp.predict(X_test) # print(X_test) # print(mlp.n_iter_) predictions_int = predictions.astype(int) y_test_int = y_test.astype(int) print(predictions_int) print(y_test_int) score = 1 if (predictions_int > y_test_int and predictions_int <= y_test_int + 15): diff = predictions_int - y_test_int while (diff > 0): score -= 0.06 diff -= 1 print(score)
train = train.drop('Genre', axis=1) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(train, feature, test_size=0.30) from sklearn.neural_network import MLPRegressor clf = MLPRegressor(hidden_layer_sizes=(5, ), activation='relu', solver='adam', learning_rate='adaptive', max_iter=1000, learning_rate_init=0.01, alpha=0.01) clf.fit(X_train, y_train) res = clf.predict(X_test) lsd = [] for item in res: if item >= 0.5: lsd.append(1) else: lsd.append(0) from sklearn.metrics import accuracy_score print(accuracy_score(lsd, y_test))
if (method == 5): print('GradientBoosting 01') str_method = 'GradientBoosting01' r = GradientBoostingRegressor(n_estimators=95, max_depth=6, learning_rate=0.04, random_state=ra2, verbose=0, warm_start=True, subsample=0.7, max_features=0.8) r.fit(x1[col], y1) a1 = NWRMSLE(y2, r.predict(x2[col]), x2['perishable']) # part of the output file name N1 = str(a1) test['transactions'] = r.predict(test[col]) test['transactions'] = test['transactions'].clip(lower=0. + 1e-12) col = [c for c in x1 if c not in ['id', 'unit_sales', 'perishable']] y1 = x1['unit_sales'].values y2 = x2['unit_sales'].values # set a new seed to generate random numbers ra2 = round(method + 31 * method + 51 * method) np.random.seed(ra2) if (method == 1):
y_pred_dtr = dtr_energy.predict(X_test_energy_stand) print("Mean squared error for DTR: {:.3f}.".format( mean_squared_error(y_pred_dtr, y_test_energy))) #Random Forest Regressor from sklearn.ensemble import RandomForestRegressor as RFR rfr_energy = RFR(n_estimators=100, min_samples_leaf=2, max_leaf_nodes=1000, random_state=37).fit(X_train_energy, y_train_energy) y_pred_rfr = rfr_energy.predict(X_test_energy) print("Mean squared error for RFR: {:.3f}.".format( mean_squared_error(y_pred_rfr, y_test_energy))) #Support Vector from sklearn.svm import SVR svr_energy = SVR().fit(X_train_energy_stand, y_train_energy) y_pred_svr = svr_energy.predict(X_test_energy_stand) print("Mean squared error for SVR: {:.3f}.".format( mean_squared_error(y_pred_svr, y_test_energy))) from sklearn.neural_network import MLPRegressor as MLPR mlpr_energy = MLPR(hidden_layer_sizes=(100, 100), alpha=.3, random_state=37, beta_1=.89, beta_2=.9995).fit(X_train_energy_stand, y_train_energy) y_pred_mlpr = mlpr_energy.predict(X_test_energy_stand) print("Mean squared error for MLPR: {:.3f}.".format( mean_squared_error(y_pred_mlpr, y_test_energy))) print('energy')
model = MLPRegressor(hidden_layer_sizes=(1, ), solver='sgd', early_stopping=False, max_iter=1000).fit(x_train, y_train) # In[8]: print("{:.2%}".format(model.score(x_train, y_train))) # In[9]: print("{:.2%}".format(model.score(x_test, y_test))) # In[10]: # plot prediction and actual data y_pred = model.predict(x_test) plt.plot(y_test, y_pred, '.') # plot a line, a perfit predict would all fall on this line x = np.linspace(-2, 2.5, 2) y = x plt.plot(x, y) plt.show() # In[11]: print(model.coefs_) # In[ ]:
# generates data & split it into X (training input) and y (target output) X = library2[:, 0:5] y = library2[:, 6] #print(X) #print(y) neurons = 20 # <- number of neurons in the hidden layer eta = 0.1 # <- the learning rate parameter # here we create the MLP regressor mlp = MLPRegressor(hidden_layer_sizes=(neurons, ), verbose=True, learning_rate_init=eta) # here we train the MLP mlp.fit(X, y) while (mlp.score(X, y) < 0): mlp.fit(X, y) # E_out in training print("Training set score: %f" % mlp.score(X, y)) # now we generate new data as testing set and get E_out for testing set Xtest = np.array([genreScore, hourScore, critScore, userScore, pubScore]) #print("Testing set score: %f" % mlp.score(X, y)) ypred = mlp.predict(Xtest) fResult = float(ypred) rResult = round(fResult) print(ypred) print("Final Score: %f" % rResult)
best_linear_regressor = LinearRegression(copy_X=True, fit_intercept=True, normalize=True) best_linear_regressor.fit(x_train_scaled, y_train) y_pred = best_linear_regressor.predict(x_test_scaled) print('MSE for Linear Regressor: ' + str(mean_squared_error(y_test, y_pred))) # In[27]: best_neural_network_regressor = MLPRegressor(activation='tanh', alpha=0.0001, hidden_layer_sizes=10, learning_rate='constant', learning_rate_init=0.01, random_state=0) best_neural_network_regressor.fit(x_train_scaled, y_train) y_pred = best_neural_network_regressor.predict(x_test_scaled) print('MSE for Neural Network Regressor: ' + str(mean_squared_error(y_test, y_pred))) # In[28]: best_gaussian_regressor = GaussianProcessRegressor( kernel=1**2 * RationalQuadratic(alpha=0.1, length_scale=1)) best_gaussian_regressor.fit(x_train_scaled, y_train) y_pred = best_gaussian_regressor.predict(x_test_scaled) print('MSE for Gaussian Regressor: ' + str(mean_squared_error(y_test, y_pred))) # In[ ]:
y_true = test_preNreal5['real'] y_pred = test_preNreal5['pre'] print(np.sqrt(metrics.mean_squared_error(y_true, y_pred))) print(mean_absolute_percentage_error(y_true, y_pred)) from sklearn.neural_network import MLPRegressor ANN = MLPRegressor(learning_rate_init=0.001, batch_size=20, tol=0.01, learning_rate='constant', hidden_layer_sizes=(1000, ), solver='adam') ANN.fit(train_feat, train_label) model_pre = ANN.predict(test_feat) test_preNreal5 = pd.DataFrame() test_preNreal5['real'] = test_label.flatten() test_preNreal5['pre'] = model_pre test_preNreal5.to_csv('ANN_wow.csv', index=False) def mean_absolute_percentage_error(y_true, y_pred): y_true, y_pred = np.array(y_true), np.array(y_pred) return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 from sklearn import metrics y_true = test_preNreal5['real'] y_pred = test_preNreal5['pre']
# -*- coding: utf-8 -*- import pandas as pd df = pd.read_csv("..\\Data\\health_insurance_2.csv") features = df.iloc[:, 0:1].values target = df.iloc[:, 1:2].values from sklearn.preprocessing import StandardScaler scaler_x = StandardScaler() features = scaler_x.fit_transform(features) scaler_y = StandardScaler() target = scaler_y.fit_transform(target) from sklearn.neural_network import MLPRegressor regression = MLPRegressor() regression.fit(features, target) score_1 = regression.score(features, target) import matplotlib.pyplot as plt plt.scatter(features, target) plt.plot(features, regression.predict(features), color='red') plt.title("Neural Net Regression") plt.xlabel("Age") plt.ylabel("Cost") # scaler.inverse_transform -> Back to real scale prediction = scaler_y.inverse_transform( regression.predict(scaler_x.fit_transform([[40]])))
#Applying MLPRegressor Model ''' sklearn.neural_network.MLPRegressor(hidden_layer_sizes=(100, ), activation='relu’, solver=’adam’, alpha=0.0001,batch_size='auto’, learning_rate=’constant’, learning_rate_init=0.001, power_t=0.5,max_iter=200, shuffle=True, random_state=None,tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True,early_stopping=False, validation_fraction=0.1,beta_1=0.9, beta_2=0.999, epsilon=1E-08, n_iter_no_change=10) ''' MLPRegressorModel = MLPRegressor(activation='tanh', # can be also identity , logistic , relu solver='lbfgs', # can be also sgd , adam learning_rate='constant', # can be also invscaling , adaptive early_stopping= False, alpha=0.0001 ,hidden_layer_sizes=(100, 3),random_state=33) MLPRegressorModel.fit(X_train, y_train) #Calculating Details print('MLPRegressorModel Train Score is : ' , MLPRegressorModel.score(X_train, y_train)) print('MLPRegressorModel Test Score is : ' , MLPRegressorModel.score(X_test, y_test)) print('MLPRegressorModel loss is : ' , MLPRegressorModel.loss_) print('MLPRegressorModel No. of iterations is : ' , MLPRegressorModel.n_iter_) print('MLPRegressorModel No. of layers is : ' , MLPRegressorModel.n_layers_) print('MLPRegressorModel last activation is : ' , MLPRegressorModel.out_activation_) #print('----------------------------------------------------') #Calculating Prediction y_pred = MLPRegressorModel.predict(X_test) print('Predicted Value for MLPRegressorModel is : ' , y_pred[:10])
momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=True) batches = iter_minibatches(sparse_matrix, prices, chunksize=1000) count = 0 for X_chunk, y_chunk in batches: print(count) count += 1 if len(X_chunk) != 0: neuralnet._partial_fit(X_chunk, y_chunk) valmat = sparse_matrix[999999:].todense() valprices = get_price_list(train) print(valmat.shape) print(valprices.shape) predicted_prices = neuralnet.predict(valmat) print('Prices predicted', time.time() - start) print(valprices.shape) print(predicted_prices.shape) print("The score is:", calc_score(valprices, predicted_prices))
y_test =y_test.astype(np.float32) mlp = MLPRegressor( activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(400, 600), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9 ) mlp.fit(X_train, y_train) y_test_predict = mlp.predict(X_test) fig = matplotlib.pyplot.gcf() fig.set_size_inches(12,6) n_faces=5 n_cols=5 image_shape = (64, 64) for i in range(n_faces): true_face = np.hstack((X_test[i], y_test[i])) if i: sub = plt.subplot(n_faces, n_cols, i * n_cols + 1) else: sub = plt.subplot(n_faces, n_cols, i * n_cols + 1, title="Real")
trainX = trainX / np.max(trainX, axis=0) rows = [] for k in test: row = test[k] rows.append(row) # create [samples x input] numpy array, one row for each training data testX = np.array([row for row in rows]) testX = testX / np.max(testX, axis=0) testY = y[-testSize:] x = [[0, 0], [1., 1.]] y = [0, 1] clf = MLPRegressor(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) clf.fit(trainX, trainY) print(clf.score(testX, testY)) yHats = clf.predict(testX) yHats = np.round(yHats) print(yHats) print(testY) print('\n') print('Misclssified: ' + str(sum(abs(testY - yHats))) + ' out of ' + str(testSize) + ' = ' + str(float(abs(sum(testY - yHats))) / float(testSize)))
class NeuralNetwork: ################# Fields ####################### # dataset_filename: string - path to dataset # header: list - header of the dataset # enumerable_columns: list - the enumerable columns # df: matrix - data set # training_set: matrix - training set # test_set: matrix - test set # TSnew_X: matrix - training set of TSnew (see documentation) # TSnew_Y: matrix - training set of TSnew (see documentation) # dim_random_subset: int - number of features to set to 0 (see documentation) # repeatSometimes: int - number of for cicles (see documentation) def __init__(self, repeatSometimes = 2, dim_random_subset = 2): # variables initialization self.enumerable_columns = [] self.dataset_filename = "" self.header = [] self.df = pandas.DataFrame() self.trainSet = pandas.DataFrame() self.testSet = pandas.DataFrame() self.TSnew_X = pandas.DataFrame() self.TSnew_Y = pandas.DataFrame() self.repeatSometimes = repeatSometimes self.dim_random_subset = dim_random_subset # This code really needs much time and therefore I save some computations if not os.path.isfile('trainSet{}-{}.csv'.format(repeatSometimes, dim_random_subset)): self.readDataset() self.discretization() self.preprocess() # creating TSnew self.createTrainingAndTestSet() self.createTSnew() # backup encoded sets self.writeCSV() else: self.readCSV() # training and test self.train() self.predict() def readDataset(self): print("DEB Read dataset") with open('header.txt') as f: self.header = f.read().split(',') print(self.header) with open('dataset.txt') as f: self.dataset_filename = f.read() print(self.dataset_filename) self.df = pandas.read_csv(self.dataset_filename, names=self.header) print('Dataset with {} entries'.format(self.df.__len__())) ############# Preprocessing ########################## # helper function (should not be called from other functions) def discretize(self, column): print("DEB Discretize column " + column) sorted_col = sorted(column) l = len(column) n = int(numpy.floor(l / 2)) if l % 2 == 0: median_1 = numpy.median(sorted_col[0:n]) median_2 = numpy.median(sorted_col[n:]) else: median_1 = numpy.median(sorted_col[0:(n + 1)]) median_2 = numpy.median(sorted_col[(n + 1):]) iqr = median_2 - median_1 h = 2 * iqr * (1 / numpy.cbrt(l)) if h > 0: bins_number = numpy.ceil((column.max() - column.min()) / h) new_col, bins = pandas.cut(column, bins_number, labels=False, retbins=True, include_lowest=False) else: new_col = column bins = [] return new_col, bins # helper function (should not be called from other functions) def normalize(column): print("DEB Normalize") h = abs(column.min()) new_col = column + h return new_col def discretization(self): print("DEB Discretization") replacements = {} bins = {} for i in range(0, self.df.shape[1]): # for each feature bins[i] = [] col = self.df.as_matrix()[:, i] flag_str = False flag_float = False flag_negative = False for j in col: if type(j) is str: flag_str = True elif type(j) is float: flag_float = True elif type(j) is int and j < 0: flag_negative = True if flag_str: continue elif flag_negative: new_col = self.normalize(col) replacements[i] = new_col bins[i] = [] elif flag_float: new_col, new_bins = self.discretize(col) replacements[i] = new_col bins[i] = new_bins for k, v in replacements.items(): self.df.iloc[:, k] = v def preprocess(self, removeColumnsWithMissingValues = False): print("DEB Preprocessing") m = self.df.as_matrix() # it is possible to encode enumerable features and to remove missing values with open('enumerable_columns.txt') as f: # e.g., self.enumerable_columns = [0, 5, 8] self.enumerable_columns = f.read() if self.enumerable_columns.__contains__(','): self.enumerable_columns = list(map(int, self.enumerable_columns.split(','))) else: self.enumerable_columns = [int(self.enumerable_columns)] print("enumerable columns are: " + str(self.enumerable_columns)) le = preprocessing.LabelEncoder() for col in self.enumerable_columns: # if the column is enumerable self.df[self.header[col]] = le.fit_transform(self.df[self.header[col]]) # A -> 0, B -> 1, ... # remove cols with missing values (NaN), even though you risk to reduce too much the dataset if removeColumnsWithMissingValues: for i in range(0, m.shape[1]): if True in m[:, i]: self.df = numpy.delete(self.df, 0, i) # delete column ############## MPL architecture ####################### def createTrainingAndTestSet(self): print("DEB Create Training set. Using formula 80-20%") self.trainSet, self.testSet = train_test_split(self.df, test_size=0.20) # hearth of the algorithm! def createTSnew(self): print("DEB Create TS new") for i in range(0, self.trainSet.shape[0]): for j in range(0, self.repeatSometimes): # choose small random subset of features X_hat X_hat = [int(self.trainSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)] # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk) row = numpy.copy(self.trainSet.as_matrix()[i, :]) for feature in X_hat: # here you set the random features to 0. X_hat represents the indices of such features row[feature] = 0 self.TSnew_X = self.TSnew_X.append(pandas.DataFrame(row.reshape(-1, len(row)))) # append row to TSnew_X copy = numpy.copy(self.trainSet.as_matrix()[i, :]) self.TSnew_Y = self.TSnew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy)))) # Y = x1...xk ############## Train & Predict ######################## def train(self): print("DEB Training with TSnew") self.MLP = MLPRegressor(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=len(self.TSnew_Y.columns), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) self.MLP.fit(self.TSnew_X, self.TSnew_Y) def predict(self): print("DEB Test") testSetNew_X = pandas.DataFrame() testSetNew_Y = pandas.DataFrame() # preparing the test set - here you do the same as in function createTSnew: if not os.path.isfile('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)): for i in range(0, self.testSet.shape[0]): # choose small random subset of features X_hat X_hat = [int(self.testSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)] # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk) row = numpy.copy(self.testSet.as_matrix()[i, :]) for feature in X_hat: # here you set the random features to 0. X_hat represents the indices of such features row[feature] = 0 testSetNew_X = testSetNew_X.append(pandas.DataFrame(row.reshape(-1, len(row)))) copy = numpy.copy(self.testSet.as_matrix()[i, :]) testSetNew_Y = testSetNew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy)))) # Y = x1...xk testSetNew_Y.to_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) testSetNew_Y.to_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) else: # if the needed DataFrames have already been calculated, simply load them from disk self.trainSet = self.trainSet.from_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.trainSet = self.trainSet.from_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) # predictions self.MLP.predict(testSetNew_X) print("Score of method (repetitions={}, subset={}): {}%".format(self.repeatSometimes, self.dim_random_subset, self.MLP.score(testSetNew_X, testSetNew_Y) * 100)) ########################## Helper functions #################### def writeCSV(self): print("DEB WriteCSV") self.trainSet.to_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.testSet.to_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_X.to_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_Y.to_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) def readCSV(self): print("DEB ReadCSV") self.trainSet = self.trainSet.from_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.testSet = self.testSet.from_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_X = self.TSnew_X.from_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_Y = self.TSnew_Y.from_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
max_iter=5000, shuffle=False, tol=0.00005, momentum=0.9, verbose=False) mlpr_model = mlpr.fit(x_train, y_train) print(f"Training iteration : {mlpr_model.n_iter_}") #%% [markdown] # ## Testing phase & result # * Show MSE score of train phase # * Show MSE, R2 and variance score of test phase #%% mlpr_predict = mlpr.predict(x_test) mse = mean_squared_error(y_test, mlpr_predict) r2 = r2_score(y_test, mlpr_predict) evs = explained_variance_score(y_test, mlpr_predict) print(f"MSE train : {mlpr_model.loss_}") print(f"MSE test : {mse}") print(f"R2 score : {r2}") print(f"Variance score : {evs}") #%% [markdown] # ## Training Loss Curve #%% plt.style.use('seaborn')
from datetime import datetime startTime = datetime.now() fileTrain = open("fingerDataTrain.dat",'r') fileVal = open("fingerDataVal.dat",'r') trainingSet = np.loadtxt(fileTrain) valSet = np.loadtxt(fileVal) fileTrain.close() fileVal.close() trainX = trainingSet[:,:13] trainY = trainingSet[:,14:] valX = valSet[:,:13] valY = valSet[:,14:] for i in range(trainX.shape[1]): m = trainX[:,i].mean() s = trainX[:,i].std() trainX[:,i] = (trainX[:,i]-m)/s valX[:,i] = (valX[:,i]-m)/s ann = MLPRegressor() ann.fit(trainX,trainY) sqError = ((ann.predict(valX)-valY)**2).mean() plt.scatter(valX[:,1], valY[:,3], color='black') plt.plot(valX[:,1], ann.predict(valX)[:,3], color='blue', linewidth=3) print datetime.now() - startTime
def MLPRegressorr(data1, y): X_train, X_test, y_train, y_test = train_test_split(data1, y, test_size=0.2, random_state=Hcurstate) X_train_new = X_train.reset_index(drop=True) y_train_new = y_train.reset_index(drop=True) X_train_new = X_train_new.values y_train_new = y_train_new.values k = 5 kf = KFold(n_splits=k, random_state=Hcurstate) avg_train_acc, avg_test_acc = 0, 0 n_estimators_grid = [5, 25, 50, 75, 100, 500] max_depth_grid = [5, 10, 25, 50, 100, 500] avgsc_lst, avgsc_train_lst, avgsc_hld_lst = [], [], [] avgsc, avgsc_train, avgsc_hld = 0, 0, 0 i = 0 for train_index, test_index in kf.split(X_train_new): # if i>0: break # i=i+1 X_train_cur, X_test_cur = X_train_new[train_index], X_train_new[ test_index] y_train_cur, y_test_cur = y_train_new[train_index], y_train_new[ test_index] X_train_train, X_val, y_train_train, y_val = train_test_split( X_train_cur, y_train_cur, test_size=0.25, random_state=Hcurstate) print(X_train_train.shape) print(X_val.shape) bestPerformingModel = MLPRegressor(hidden_layer_sizes=(100, 100), max_iter=300, random_state=Hcurstate) bestPerformingModel = bestPerformingModel.fit(X_train, y_train) print(bestPerformingModel.n_layers_) y_pred = bestPerformingModel.predict(X_train_cur) bscr_train = sqrt(mean_squared_error(y_pred, y_train_cur)) y_pred = bestPerformingModel.predict(X_test_cur) bscr = sqrt(mean_squared_error(y_pred, y_test_cur)) y_pred = bestPerformingModel.predict(X_test) bscr_hld = sqrt(mean_squared_error(y_pred, y_test)) avgsc_train_lst.append(bscr_train) avgsc_lst.append(bscr) avgsc_hld_lst.append(bscr_hld) avgsc_train = avgsc_train + bscr_train avgsc = avgsc + bscr avgsc_hld = avgsc_hld + bscr_hld print(bscr_train) print(bscr) print(bscr_hld) print('5-fold Train, Validation, and Test loss:') print(avgsc_train_lst) print(avgsc_lst) print(avgsc_hld_lst) print('Avg Train, Validation, and Test loss:') print(avgsc_train / k) print(avgsc / k) print(avgsc_hld / k) y_pred = bestPerformingModel.predict(X_test) cnf_matrix = metrics.confusion_matrix(y_test, y_pred) return avgsc_train_lst, avgsc_lst, avgsc_hld_lst
def train_evaluate(job): ''' train MLP Regressor models for COF and intercept for the parameters given in the job statepoints evaluate using R^2, root mean square error, and mean absolute error ''' for target in TARGETS: # read training data with open(root_dir + '/csv-files/{}_training_4.csv'.format(target)) as f: train = pd.read_csv(f, index_col=0) # read testing data with open(root_dir + '/csv-files/{}_testing.csv'.format(target)) as f: test = pd.read_csv(f, index_col=0) # Reduce the number of features by running data thru dimensionality reduction features_all = list(train.drop([target] + IDENTIFIERS, axis=1)) train_red = dimensionality_reduction(train, features_all, filter_missing=True,filter_var=True, filter_corr=True, missing_threshold=0.4, var_threshold=0.02, corr_threshold=0.9) features = list(train_red.drop([target] + IDENTIFIERS, axis=1)) # split train and test data into features (X) and target (y) X_train, y_train = train[features], train[target] X_test, y_test = test[features], test[target] # normalize input features scaler = MinMaxScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # train multi-layer perceptron neural network hidden_layers = [job.sp.num_perceptrons]*job.sp.num_layers MLP = MLPRegressor(hidden_layer_sizes=hidden_layers, alpha=job.sp.alpha, random_state=43, tol=1e-6, max_iter=1000) MLP.fit(X_train_scaled, y_train) # score the model on train and test data using RMSE, MAE, R^2 # store the scores in job document r2_test = MLP.score(X_test_scaled, y_test) r2_train = MLP.score(X_train_scaled, y_train) job.doc['{}_r2_test'.format(target)] = r2_test job.doc['{}_r2_train'.format(target)] = r2_train y_test_pred = MLP.predict(X_test_scaled) y_train_pred = MLP.predict(X_train_scaled) rmse_test = mean_squared_error(y_test, y_test_pred, squared=False) rmse_train = mean_squared_error(y_train, y_train_pred, squared=False) job.doc['{}_rmse_test'.format(target)] = rmse_test job.doc['{}_rmse_train'.format(target)] = rmse_train mae_test = mean_absolute_error(y_test, y_test_pred) mae_train = mean_absolute_error(y_train, y_train_pred) job.doc['{}_mae_test'.format(target)] = mae_test job.doc['{}_mae_train'.format(target)] = mae_train # add features to json file in job workspace with open(job.fn('{}_features.json'.format(target)), 'w') as f: json.dump(features, f) # pickle out the model and scaler with open(job.fn('{}_trained.pickle'.format(target)), 'wb') as f: pickle.dump(MLP, f) with open(job.fn('{}_scaler.pickle'.format(target)), 'wb') as f: pickle.dump(scaler, f) # copy the job directory to external hard drive job_dir_path = pathlib.Path(root_dir + '/workspace/' + job.id) hard_drive_path = pathlib.Path('/mnt/d/neural-networks-with-signac/workspace/') process = Popen(['cp', '-r', job_dir_path, hard_drive_path], stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() # remove trained model pickle files from job directory # because they are backed up to external hard drive and take up a lot of space for target in TARGETS: path_to_pickle = pathlib.Path(str(job_dir_path) + '/{}_trained.pickle'.format(target)) process = Popen(['rm', path_to_pickle], stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate()
# print(X) # print(y) print('Begin Train') m = MLPRegressor(verbose=True, activation='logistic', solver='adam', early_stopping=False, hidden_layer_sizes=(50)) m.fit(X_train, y_train) print(m) print('End Train') # print(y[:2]) print(m.predict(X_train[:5, ])) print(y_train[:5]) print() print(m.predict(X_test[:5])) print(y_test[:5]) print(m.score(X_train, y_train)) print(m.score(X_test, y_test)) y_pred = m.predict(X_test) correct = 0 total = 0 for i in range(0, y_test.size): total += 1 if y_test[i] < 0 and y_pred[i] < 0:
axes.set_title("Data: " + file) axes.set_ylabel('Normalized distant count') axes.set_xlabel('Distance ($\AA$)') axes.hist(y_train, 150, color='blue',normed=True, label='plot',linewidth=2,alpha=1.0) plt.show() """ # Fit model clf.fit(X_train, y_train) # Compute and print r^2 score print(clf.score(X_test, y_test)) # Store predicted energies Ecmp = clf.predict(X_test) Ecmp = gt.hatokcal * (Ecmp) Eact = gt.hatokcal * (y_test) # Compute RMSE in kcal/mol rmse = gt.calculaterootmeansqrerror(Ecmp, Eact) # End timer _t1e = tm.time() print("Computation complete. Time: " + "{:.4f}".format((_t1e - _t1b)) + "s") # Output model information print("RMSE: " + str(rmse)) # print(clf.coef_) # print(clf.intercept_)
# split into training and validation dataset from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) # train mlpr_model = MLPRegressor() mlpr_model.fit(X_train, y_train) # In[ ]: #%% 19 - 1 # start to predict y_pred = mlpr_model.predict(X_test) # In[ ]: #%% 19 - 2 # transform threshold = 0.5 y_pred2 = pd.DataFrame({'Predicted': y_pred}) y_pred2 = transform_predicted(y_pred2) y_pred2.head(10) # In[ ]: #%% 19 - 3 y_test2 = pd.DataFrame({'Survived': y_test}) y_test2.head(10)
#Example with a Regressor using the scikit-learn library # example for the XOr gate from sklearn.neural_network import MLPRegressor X = [[0., 0.],[0., 1.], [1., 0.], [1., 1.]] # each one of the entries 00 01 10 11 y = [0, 1, 1, 0] # outputs for each one of the entries # check http://scikit-learn.org/dev/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor #for more details reg = MLPRegressor(hidden_layer_sizes=(5),activation='tanh', algorithm='sgd', alpha=0.001, learning_rate='constant', max_iter=10000, random_state=None, verbose=False, warm_start=False, momentum=0.8, tol=10e-8, shuffle=False) reg.fit(X,y) outp = reg.predict([[0., 0.],[0., 1.], [1., 0.], [1., 1.]]) print'Results:' print '0 0 0:', outp[0] print '0 1 1:', outp[1] print '1 0 1:', outp[2] print '1 1 0:', outp[0] print'Score:', reg.score(X, y)
KNN = KNeighborsRegressor() knn_param_grid = {'n_neighbors':[3,10]} knn_grid = model_selection.GridSearchCV(KNN, knn_param_grid, cv=10, n_jobs=25, verbose=1, scoring='neg_mean_squared_error') knn_grid.fit(X_train, y_train) print(' Best Params:' + str(knn_grid.best_params_)) KNN = KNeighborsRegressor(n_neighbors=10) KNN.fit(X_train, y_train) y_predict_knn=KNN.predict(X_test) mae_knn=(np.abs(y_predict_knn-y_test)).sum()/9467 joblib.dump(KNN, 'KNN.model') print(mae_knn) #mlp from sklearn.neural_network import MLPRegressor MLP = MLPRegressor(hidden_layer_sizes=(300, 200,200),max_iter=100,activation='relu') MLP.fit(X_train, y_train) y_predict_MLP=MLP.predict(X_test) mae_MLP=(np.abs(y_predict_MLP-y_test)).sum()/9467 joblib.dump(MLP, 'MLP.model') print(mae_MLP) #xgb import xgboost as xgb x_regress = xgb.XGBRegressor(max_depth=20,n_estimators =5000) x_regress_param_grid = {'max_depth': [5,20]} x_regress_grid = model_selection.GridSearchCV(x_regress, x_regress_param_grid, cv=10, n_jobs=25, verbose=1, scoring='neg_mean_squared_error') x_regress.fit(X_train, y_train) joblib.dump(x_regress, 'x_regress_grid.model') y_predict_xgb=x_regress.predict(X_test) mae_xgb=(np.abs(y_predict_xgb-y_test)).sum()/9467 # 模型融合 #简单平均