def trainNNs(X, T, trainFraction, hiddenLayerStructures, numberRepetitions, numberIterations, classify=False): """ Trains neural networks repeatedly. :param X: Data to partition and train :param T: Target values :param trainFraction: What percent of the data should be used for training :param hiddenLayerStructures: Number of hidden layer structures while training :param numberRepetitions: Number of times to run train :param numberIterations: Iterations within Neural Network :param classify: Classification or Regression :return: List containing the hidden layer structure, the training error and testing error, and the elapsed time. """ import numpy as np import neuralnetworks as nn import time import mlutils as ml results = [] global resultErrors resultErrors = [] # debugging verbose = True for structure in hiddenLayerStructures: trainList = [] testList = [] t0 = time.time() for i in range(numberRepetitions): Xtrain, Ttrain, Xtest, Ttest = ml.partition(X, T, (trainFraction, 1 - trainFraction), classification=classify) if classify: nnet = nn.NeuralNetworkClassifier(X.shape[1],structure,len(np.unique(T))) else: nnet = nn.NeuralNetwork(X.shape[1],structure,T.shape[1])
def trainNNs(X, T, trainFraction, hiddenLayerStructures, numberRepetitions, numberIterations, classify=False): import neuralnetworks as nn import mlutils as ml import numpy as np import time result = [] for structure in hiddenLayerStructures: trainedResult = [] testResult = [] t0 = time.time() for n in range(0, numberRepetitions): Xtrain,Ttrain,Xtest,Ttest = ml.partition(X,T,(trainFraction, 1-trainFraction),classify) if classify: nnet = nn.NeuralNetworkClassifier(X.shape[1], structure, len(np.unique(T))) nnet.train(Xtrain, Ttrain, numberIterations, errorPrecision=1.e-8) trainedResult.append(np.sum(nnet.use(Xtrain)==Ttrain)/len(Ttrain)) testResult.append(np.sum(nnet.use(Xtest)==Ttest)/len(Ttest)) else: nnet = nn.NeuralNetwork(X.shape[1], structure, T.shape[1]) nnet.train(Xtrain, Ttrain, numberIterations) trainedResult.append(np.sqrt(np.mean(((nnet.use(Xtrain)-Ttrain)**2)))) testResult.append(np.sqrt(np.mean(((nnet.use(Xtest)-Ttest)**2)))) result.append([structure, trainedResult, testResult, time.time() - t0]) return result
def performanceC(X, T, trainFraction, hidden, numberRepetitions, numberIterations): # Make the lists for train and test data performance trainP = [] testP = [] # For numberRepetitions for rep in range(numberRepetitions): # Use ml.partition to randomly partition X and T into training and testing sets. Xtrain, Ttrain, Xtest, Ttest = ml.partition( X, T, (trainFraction, 1 - trainFraction), classification=True) # Create a neural network of the given structure nnet = nn.NeuralNetworkClassifier(X.shape[1], hidden, len(np.unique(T))) # Train it for numberIterations # nnet.train(X, T, numberIterations) nnet.train(Xtrain, Ttrain, numberIterations) # Use the trained network to produce outputs for the training and for the testing sets Ytrain = nnet.use(Xtrain) Ytest = nnet.use(Xtest) # Calculate the fraction of samples incorrectly classified for training and testing sets trainFrac = np.sum(Ytrain != Ttrain) / Ttrain.shape[0] testFrac = np.sum(Ytest != Ttest) / Ttest.shape[0] # Add the training and testing performance to a collection (such as a list) for this network structure trainP.append(trainFrac) testP.append(testFrac) # Return trainP and testP return trainP, testP
def performance(X, T, trainFraction, hidden, numberRepetitions, numberIterations): # Make the lists for train and test data performance trainP = [] testP = [] # For numberRepetitions for rep in range(numberRepetitions): # Use ml.partition to randomly partition X and T into training and testing sets. Xtrain, Ttrain, Xtest, Ttest = ml.partition( X, T, (trainFraction, 1 - trainFraction), classification=False) # Create a neural network of the given structure nnet = nn.NeuralNetwork(X.shape[1], hidden, T.shape[1]) # Train it for numberIterations # nnet.train(X, T, numberIterations) nnet.train(Xtrain, Ttrain, numberIterations) # Use the trained network to produce outputs for the training and for the testing sets Ytrain = nnet.use(Xtrain) Ytest = nnet.use(Xtest) # Calculate the RMSE of training and testing sets. trainRMSE = np.sqrt(np.mean((Ytrain - Ttrain)**2)) testRMSE = np.sqrt(np.mean((Ytest - Ttest)**2)) # Add the training and testing performance to a collection (such as a list) for this network structure trainP.append(trainRMSE) testP.append(testRMSE) # Return trainP and testP return trainP, testP
def trainNNs(X, T, trainFraction, hiddenLayerStructures, numberRepetitions, numberIterations, classify=False): results = [] for structure in hiddenLayerStructures: print(structure, end=" ") #time each hidden layer structure start_time = time.time() structureData = [structure] trainDataResults = [] testDataResults = [] for i in range(0, numberRepetitions): #partition data Xtrain, Ttrain, Xtest, Ttest = ml.partition( X, T, (trainFraction, 1 - trainFraction), classification=classify) if not classify: #create/train network nnet = nn.NeuralNetwork(Xtrain.shape[1], structure, Ttrain.shape[1]) nnet.train(Xtrain, Ttrain, nIterations=numberIterations) #test netork Ytrain = nnet.use(Xtrain) Ytest = nnet.use(Xtest) #add error for testing and traing data trainDataResults.append(np.sqrt(np.mean((Ytrain - Ttrain)**2))) testDataResults.append(np.sqrt(np.mean((Ytest - Ttest)**2))) else: #create/train network nnet = nn.NeuralNetworkClassifier(Xtrain.shape[1], structure, np.unique(Ttrain).size) nnet.train(Xtrain, Ttrain, nIterations=numberIterations) #test netork Ptrain = nnet.use(Xtrain) Ptest = nnet.use(Xtest) #add error for testing and traing data trainDataResults.append(1 - (np.sum(Ptrain == Ttrain) / len(Ttrain))) testDataResults.append(1 - (np.sum(Ptest == Ttest) / len(Ttest))) structureData.append(trainDataResults) structureData.append(testDataResults) structureData.append(time.time() - start_time) results.append(structureData) print("done") return results
def trainNNs(X, T, trainFraction, hiddenLayerStructures, numberRepetitions, numberIterations, classify=False): # Master result list - we shall keep appending to this. result = [] # Iterate through each network structure provided. for net in hiddenLayerStructures: # To store performances of each training run for a network structure. trainPerformance = [] testPerformance = [] # To measure time elapsed. start_time = time.time() # Iterate for number of repetitions to train neural network. for i in range(numberRepetitions): # Now, we have to partition X and T, into training and testing data. Xtrain, Ttrain, Xtest, Ttest = ml.partition( X, T, trainFraction * 100, classification=classify) # Create a neural network for this structure. nnet = nn.NeuralNetwork(1, net, 1) # Commence training nnet.train(Xtrain, Ttrain, nIterations=numberIterations) # Use the trained network to produce outputs (for both training and testing input datasets). trainOut = nnet.use(Xtrain) testOut = nnet.use(Xtest) # If classifying, calculate samples classified incorrectly (for both training and testing datasets). if classify == True: pass else: # Calculate error in training set. trainError = trainOut - Xtrain trainRMSE = np.sqrt(np.mean((trainError**2))) # Calculate error in testing set testError = testOut - Xtest testRMSE = np.sqrt(np.mean((testError**2))) # Append train and test performances to list. trainPerformance.append(trainRMSE) testPerformance.append(testRMSE) end_time = time.time() elapsed = end_time - start_time # Now, we append everything to the master 'result' list. result.append([net, trainPerformance, testPerformance, elapsed]) return result
def trainNNs(X, T, trainFraction, hiddenLayerStructures, numberRepetitions, numberIterations, classify): results = [] # Do tasks here for h_layer in hiddenLayerStructures: start = time.time() train_rmse = [] test_rmse = [] for repetition in range(numberRepetitions): Xtrain, Ttrain, Xtest, Ttest = ml.partition( X, T, (trainFraction, 1 - trainFraction), classification=classify) if classify: nnet = nn.NeuralNetworkClassifier(X.shape[1], h_layer, T.shape[1]) nnet.train(Xtrain, Ttrain, numberIterations) predTest, probsTest, _ = nnet.use( Xtest, allOutputs=True) # discard hidden unit outputs ml.percentCorrect(predTest, Ttest) else: nnet = nn.NeuralNetwork(X.shape[1], h_layer, T.shape[1]) nnet.train(Xtrain, Ttrain, numberIterations) Ytrain = nnet.use(Xtrain) Ytest = nnet.use(Xtest) trn_rmse = np.sqrt(np.mean((Ytrain - Ttrain)**2)) tst_rmse = np.sqrt(np.mean((Ytest - Ttest)**2)) train_rmse.append(trn_rmse) test_rmse.append(tst_rmse) if repetition == (numberRepetitions - 1): total_time = time.time() - start results.append([h_layer, train_rmse, test_rmse, total_time]) # End tasks # print(results) return results
result.append([structure, trainedResult, testResult, time.time() - t0]) return result def summarize(results): import numpy as np summaryResults = [] for result in results: summaryResults.append([result[0], np.mean(result[1]), np.mean(result[2]), result[3]]) return summaryResults def bestNetwork(summary): best = min(summary, key=lambda l: l[2]) return best data = pd.read_csv("templates/data1Normed.csv") names = list(data) data["signcode"] = data["sign"].astype('category').cat.codes data = data.values Xhands = data[:, 0:63] Xhands = Xhands.astype(np.float64) Tsign = data[:, 64:65] Tsign = Tsign.astype(np.int32) #run best on Xtrain,Ttrain,Xtest,Ttest = ml.partition(Xhands,Tsign,(0.8, 0.2),True) nnet = nn.NeuralNetworkClassifier(Xtrain.shape[1], bestNet, len(np.unique(Ttrain))) nnet.train(Xtrain, Ttrain, 200) result = nnet.use(Xtest)
def run_train(rank, size, mode): client = InsecureClient('http://juneau:46731', user='******') # HDFS Web UI port!! with client.read("/pubg/aggregate/agg_match_stats_0.csv") as f: df = pd.read_csv(f, usecols=[1, 3, 4, 9, 12]).replace(to_replace={ 'tpp': 2, 'fpp': 1 }, value=None) with client.read("/pubg/aggregate/agg_match_stats_1.csv") as f: temp = pd.read_csv(f, usecols=[1, 3, 4, 9, 12]).replace(to_replace={ 'tpp': 2, 'fpp': 1 }, value=None) df = df.append(temp, ignore_index=True) with client.read("/pubg/aggregate/agg_match_stats_2.csv") as f: temp = pd.read_csv(f, usecols=[1, 3, 4, 9, 12]).replace(to_replace={ 'tpp': 2, 'fpp': 1 }, value=None) df = df.append(temp, ignore_index=True) with client.read("/pubg/aggregate/agg_match_stats_3.csv") as f: temp = pd.read_csv(f, usecols=[1, 3, 4, 9, 12]).replace(to_replace={ 'tpp': 2, 'fpp': 1 }, value=None) df = df.append(temp, ignore_index=True) with client.read("/pubg/aggregate/agg_match_stats_4.csv") as f: temp = pd.read_csv(f, usecols=[1, 3, 4, 9, 12]).replace(to_replace={ 'tpp': 2, 'fpp': 1 }, value=None) df = df.append(temp, ignore_index=True) #df = pd.read_csv('agg_match_stats_0.csv', usecols=[1, 3, 4, 9, 12], nrows=50000).replace(to_replace={'tpp': 2, 'fpp': 1}, value=None) # local read instead of through HDFS print(f'Shape of data read: {df.shape}') df = df[df['player_survive_time'] < 2500] # removing outlier survival times if mode == 1: X = df[df['match_mode'] == 1].drop( columns=['match_mode']).values.astype('double') T = df[df['match_mode'] == 1].iloc[:, 4:].values.astype('double').reshape( -1, 1) if mode == 2: X = df[df['match_mode'] == 2].drop( columns=['match_mode']).values.astype('double') T = df[df['match_mode'] == 2].iloc[:, 4:].values.astype('double').reshape( -1, 1) #print(f'X.shape: {X.shape}, T.shape: {T.shape}') frac = 0.8 X_train, X_test, T_train, T_test = ml.partition(X, T, frac) train = partition_dataset(np.concatenate((X_train, T_train), axis=1)) X_train, T_train = train[:, :4], train[:, 4:] network = [5] relu = True n_iterations = 500 batch_size = 67000 learn_rate = 10**-5 Qnet = nn.NN_distributed(X_train.shape[1], network, T_train.shape[1], relu) net, err = Qnet.train_pytorch(X_train, T_train, n_iterations, batch_size, learn_rate, verbose=True) print( f'Final Train RMSE error: {err[-1].detach().cpu().numpy()}, training time: {net.time}' ) Y_test = net.use_pytorch(X_test) # predictions RMSE_net = np.sqrt(np.mean( (Y_test - T_test)**2)) # errors = predictions - targets print(f'Test RMSE: {RMSE_net}') print( f'Sample Target: {T_test[0][0]}, Predicted Value: {net.use_pytorch(X_test[0])[0]}' ) # sample prediction model = nn.NN_distributed(X_train.shape[1], network, T_train.shape[1], relu) if mode == 1: model.load_state_dict(torch.load('Best network (FPP).pth')) if mode == 2: model.load_state_dict(torch.load('Best network (TPP).pth')) Y_test_best = model.use_pytorch(X_test) RMSE_model = np.sqrt(np.mean((Y_test_best - T_test)**2)) print(f'Best network Test RMSE: {RMSE_model}') if RMSE_net < RMSE_model / 2: n_epochs = len(err) fig = plt.figure(figsize=(12, 12)) plt.plot(list(range(1, n_epochs + 1)), err) plt.xlim(1 - 0.05 * n_epochs, n_epochs * 1.05) plt.xlabel('Epochs') plt.ylabel('Train RMSE') if mode == 1: torch.save(net.state_dict(), 'Best network (FPP).pth') plt.savefig('Error rate - best network (FPP).png') if mode == 2: torch.save(net.state_dict(), 'Best network (TPP).pth') plt.savefig('Error rate - best network (TPP).png') print('Saving as new best network')