def train_cross_validate(train, label, custom_net=None, training_mse_threshold=0.40, testing_mse_threshold=0.60, epoch_threshold=10, epochs=100, hidden_size=50): # Test Set. x_train = train[0:split_at, :] y_train_slice = label.__getslice__(0, split_at) y_train = y_train_slice.reshape(-1, 1) x_test = train[split_at:, :] y_test_slice = label.__getslice__(split_at, label.shape[0]) y_test = y_test_slice.reshape(-1, 1) # Shape. input_size = x_train.shape[1] target_size = y_train.shape[1] input_size_test = x_test.shape[1] target_size_test = y_test.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # prepare dataset ds_test = SDS(input_size, target_size) ds_test.setField('input', x_test) ds_test.setField('target', y_test) min_mse = 1000000 # init and train if custom_net == None: net = buildNetwork(input_size, hidden_size, target_size, bias=True, hiddenclass=TanhLayer) else: print "Picking up the custom network" net = custom_net trainer = RPropMinusTrainer(net, dataset=ds, verbose=True, weightdecay=0.01, batchlearning=True) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() print "training mse, epoch {}: {}".format(i + 1, mse) p = net.activateOnDataset(ds_test) mse = MSE(y_test, p) print "-- testing mse, epoch {}: {}".format(i + 1, mse) pickle.dump(net, open("current_run", 'wb')) if min_mse > mse: print "Current minimum found at ", i pickle.dump(net, open("current_min_epoch_" + model_file, 'wb')) min_mse = mse pickle.dump(net, open(model_file, 'wb')) return net
def test(self, arr): # load model net, std_scale = pickle.load(open(self.model_file, 'rb')) print 'Finish loading model' # Load test data x_test, y_test = load_data(arr) x_test_scaled = std_scale.transform( x_test) # Normalize to standard normal y_test_dummy = np.zeros(y_test.shape) input_size = x_test_scaled.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test_scaled) ds.setField('target', y_test_dummy) # predict print 'Activating ds' p = net.activateOnDataset(ds) print 'debug' # ptest = preprocessing.StandardScaler().fit_transform(p) # p_scaled = std_scale.inverse_transform(ptest) # Convert back to original scale dna = self.convert_to_dna(p) return dna
def fit(self, X, y): _, self.in_size = X.shape _, self.out_size = y.shape ds = SDS(self.in_size, self.out_size) ds.setField('input', X) ds.setField('target', y) self.net = buildNetwork(self.in_size, self.h_size, self.out_size, bias=True) trainer = BP(self.net, ds) print("start training ...") #mse = trainer.train() #trainer.trainUntilConvergence(verbose=True, maxEpochs=4) for n in xrange(self.epo): mse = trainer.train() rmse = sqrt(mse) print("RMSE = %8.3f epoch = %d" % (rmse, n)) return self
def fit(self, X, y): y_train = np.array([[yn] for yn in y]) _, self.in_size = X.shape _, self.out_size = y_train.shape ds = SDS(self.in_size, self.out_size) ds.setField('input', X) ds.setField('target', y_train) self.net = buildNetwork(self.in_size, self.h_size, self.out_size, bias=True) trainer = BP(self.net, ds) print("start training ...") for n in xrange(self.epo): mse = trainer.train() rmse = sqrt(mse) if self.verbose: print("RMSE = %8.3f epoch = %d" % (rmse, n)) return self
def predict(X, net): # Test Set. x_test = X[:, :] # you'll need labels. In case you don't have them... y_test_dummy = np.zeros((X.shape[0], 1)) input_size = x_test.shape[1] target_size = y_test_dummy.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test_dummy) p = net.activateOnDataset(ds) print p.shape np.savetxt("1_" + output_predictions_file, p, fmt='%.6f') s = pd.Series(p[:, 0]) s.index += 1 s.to_csv('neural_prediction_3.csv', header=['Prediction'], index=True, index_label='ID')
def validate(X, y, net): # Test Set. x_test = X[split_at:, :] y_test = y.__getslice__(split_at, y.shape[0]) y_test = y_test.reshape(-1, 1) # you'll need labels. In case you don't have them... y_test_dummy = np.zeros(y_test.shape) input_size = x_test.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test) # predict p = net.activateOnDataset(ds) mse = MSE(y_test, p) print "testing MSE:", mse np.savetxt(output_predictions_file, p, fmt='%.6f')
def train_fn(trainfile, hiddennodes, output_model_file): hidden_size = hiddennodes print 'Loading data..' x_train, y_train = load_data(trainfile) input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # init and train net = buildNetwork(input_size, hidden_size, target_size, bias=True, hiddenclass=SigmoidLayer, outclass=SigmoidLayer) trainer = BackpropTrainer(net, ds) print 'Training..' trainer.trainUntilConvergence(validationProportion=0.15, maxEpochs=1000, continueEpochs=10) print 'Finish training. Serializing model...' pickle.dump(net, open(output_model_file, 'wb'))
def predict(isGroup): path_test_file = '/home/rodolfo/Projetos/NeuralNetwork/data/test_groups_%s_file.csv' % isGroup path_neural_network = 'model_groups_%s.pkl' % isGroup test_file = path_test_file model_file = path_neural_network output_predictions_file = 'predictions_file.txt' # load model net = pickle.load(open(model_file, 'rb')) # load data test = np.loadtxt(test_file, delimiter=',') x_test = test[:, 0:-1] y_test = test[:, -1] y_test = y_test.reshape(-1, 1) # you'll need labels. In case you don't have them... y_test_dummy = np.zeros(y_test.shape) input_size = x_test.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test_dummy) # predict p = net.activateOnDataset(ds) np.savetxt(output_predictions_file, p, fmt='%.6f')
def main(): train_file = 'data/train.csv' # validation_file = 'data/validation.csv' output_model_file = 'model.xml' # hidden_size = 4 epochs = 500 # load data # def loadData(): train = np.loadtxt(train_file, delimiter=' ') Input = train[0:,0:3] Output = train[0:,3:5] # validation = np.loadtxt(validation_file, delimiter=',') # train = np.vstack((train, validation)) # x_train = train[:, 0:-1] # y_train = train[:, -1] # y_train = y_train.reshape(-1, 1) # input_size = x_train.shape[1] # target_size = y_train.shape[1] # prepare dataset # def prepare dataset(input_size, target_size): ds = SDS(Input,Output) # ds.addSample(input_size) # ds.setField('input', x_train) # ds.setField('target', y_train) # init and train # def initTrain(input_size, hidden_size, input, output): # net = buildNetwork(input_size, hidden_size, target_size, bias=True) net = buildNetwork(3, # input layer 4, # hidden0 2, # output hiddenclass=SigmoidLayer, outclass=SigmoidLayer, bias=True ) net = NetworkReader.readFrom('model.xml') for i,o in zip(Input,Output): ds.addSample(i,o) print i, o trainer = BackpropTrainer(net, ds) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) print "training RMSE, epoch {}: {}".format(i + 1, rmse) if os.path.isfile("../stopfile.txt") == True: break NetworkWriter.writeToFile(net, output_model_file)
def train(train_select, validate_select, aggregate_ttrss): train = pd_to_numpy(train_select, aggregate_ttrss) validation = pd_to_numpy(validate_select, aggregate_ttrss) output_model_file = 'model.pkl' hidden_size = 20 epochs = 10 train = np.vstack((train, validation)) x_train = train[:, 0:-1] y_train = train[:, -1] y_train = y_train.reshape(-1, 1) y_train = y_train.reshape(-1, 1) print(x_train, y_train) input_size = x_train.shape[1] target_size = y_train.shape[1] # print (input_size, target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # init and train # fnn = FeedForwardNetwork() net = buildNetwork( input_size, hidden_size, target_size, bias=True, ) # net = NNregression(ds) trainer = BackpropTrainer(net, ds, verbose=True, weightdecay=0.01) print("training for {} epochs...".format(epochs)) print(input_size, target_size, x_train, y_train) # plt.axis([0, epochs, 0, 0.03]) # plt.xlabel('epoch') # plt.ylabel('error') # plt.ion() for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) # plt.scatter(i, rmse, s=5) # plt.pause(0.00001) print("training RMSE, epoch {}: {}".format(i + 1, rmse)) pickle.dump(net, open(output_model_file, 'wb')) return net
def predict_proba(self, X): row_size, in_size = X.shape y_test_dumy = np.zeros([row_size, self.out_size]) assert (self.net.indim == in_size) ds = SDS(in_size, self.out_size) ds.setField('input', X) ds.setField('target', y_test_dumy) p = self.net.activateOnDataset(ds) return p
def train_fn(trainfile, hiddennodes): output_model_file = '../Serialized/model_{0}_nodes.pkl'.format( str(hiddennodes)) hidden_size = hiddennodes print 'Loading data..' x_train, y_train = load_data(trainfile) input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # init and train net = buildNetwork(input_size, hidden_size, target_size, bias=True, hiddenclass=SigmoidLayer, outclass=SigmoidLayer) trainer = BackpropTrainer(net, ds) # print "training for {} epochs...".format( epochs ) # # for i in range(epochs): # mse = trainer.train() # rmse = sqrt( mse ) # print "training RMSE, epoch {}: {}".format( i + 1, rmse ) print 'Training..' trainer.trainUntilConvergence(validationProportion=0.15, maxEpochs=1000, continueEpochs=10) print 'Finish training. Serializing model...' pickle.dump(net, open(output_model_file, 'wb'))
def validate(train_select, validate_select): train = pd_to_numpy(train_select) validation = pd_to_numpy(validate_select) output_model_file = 'model_val.pkl' hidden_size = 100 epochs = train.shape[0] continue_epochs = 100 validation_proportion = 0.15 # load data, join train and validation files # train = np.loadtxt( train_file, delimiter = ',' ) # validation = np.loadtxt( validation_file, delimiter = ',' ) train = np.vstack((train, validation)) x_train = train[:, 0:-1] y_train = train[:, -1] y_train = y_train.reshape(-1, 1) input_size = x_train.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # init and train net = buildNetwork(input_size, hidden_size, target_size, bias=True) trainer = BackpropTrainer(net, ds) train_mse, validation_mse = trainer.trainUntilConvergence( verbose=True, validationProportion=validation_proportion, maxEpochs=epochs, continueEpochs=continue_epochs) pickle.dump(net, open(output_model_file, 'wb'))
def prepareDataset(): train_file = "../traindata/train_scaled.csv" train = np.loadtxt(train_file, delimiter=',') x_train = train[:, 0:-1] y_train = train[:, -1] y_train = y_train.reshape(-1, 1) input_size = x_train.shape[1] target_size = y_train.shape[1] print input_size print target_size # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) return (ds, input_size)
def predict(aggregate_quotes, aggregate_ttrss): # test_file = 'data/test.csv' model_file = 'model.pkl' output_predictions_file = 'predictions.txt' # load model net = pickle.load(open(model_file, 'rb')) # load data test = pd_to_numpy(aggregate_quotes, aggregate_ttrss) x_test = test[:, 0:-1] y_test = test[:, -1] y_test = y_test.reshape(-1, 1) # # you'll need labels. In case you don't have them... # y_test_dummy = np.zeros( y_test.shape ) # y_test_dummy = np.zeros(y_test.shape) print(x_test, y_test) input_size = x_test.shape[1] target_size = y_test.shape[1] print(net.indim, net.outdim, input_size, target_size) assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test) # predict p = net.activateOnDataset(ds) mse = MSE(y_test, p) rmse = sqrt(mse) print("testing RMSE:", rmse, p) np.savetxt(output_predictions_file, p, fmt='%.6f') return p
def train(self, arr): ''' Train NN for given data :param arr: [wt_arr, mt_arr], in ATCG or atcg :return: void, but serialize model to file ''' x_train, y_train = load_data(arr) std_scale = preprocessing.StandardScaler().fit(x_train) x_train_scaled = std_scale.transform( x_train) # Normalize to standard normal # y_train_scaled = std_scale.transform(y_train) # Try not scaling y input_size = x_train_scaled.shape[1] target_size = y_train.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train_scaled) ds.setField('target', y_train) # init and train net = buildNetwork(input_size, self.hiddennodes, target_size, bias=True, hiddenclass=TanhLayer, outclass=TanhLayer) trainer = BackpropTrainer(net, ds) print 'Training..' trainer.trainUntilConvergence(validationProportion=0.15, maxEpochs=1000, continueEpochs=10) print 'Finish training. Serializing bundle...' bundle = [net, std_scale] pickle.dump(bundle, open(self.model_file, 'wb'))
def neuralNetworkRegression(X_test): """ :param X: data consisting of features (excluding class variable) :param Y: column vector consisting of class variable :return: models neural network regression with fine-tuning of epochs """ print "NEURAL NETWORK REGRESSION" print "Executing..." print print "Loading saved model..." net = pickle.load(open("Models/neural.sav", 'rb')) # utils.neuralNetworkRegression() """ predict new value """ y_test = np.zeros((X_test.shape[0], 1)) input_size = X_test.shape[1] target_size = y_test.shape[1] ds = SDS(input_size, target_size) ds.setField('input', X_test) ds.setField('target', y_test) prediction = net.activateOnDataset(ds) print prediction return prediction
def test_fn(testfile, hiddennodes, model_file): # load model net = pickle.load( open( model_file, 'rb' )) print 'Finish loading model' # Load test data x_test, y_test = load_data(testfile) y_test_dummy = np.zeros( y_test.shape ) input_size = x_test.shape[1] target_size = y_test.shape[1] assert( net.indim == input_size ) assert( net.outdim == target_size ) # prepare dataset ds = SDS( input_size, target_size ) ds.setField( 'input', x_test ) ds.setField( 'target', y_test_dummy ) # predict print 'Activating ds' p = net.activateOnDataset( ds ) def threshold(x): if x>0.5: print 'x>0.5' return 0 if x<0.5 else 1 p_converted = [] for each in p: converted = map(threshold, each) p_converted.append(converted) p_converted = np.array(p_converted) acc = accuracy_score(y_test, p_converted) print 'Accuracy score=%s' %acc
#pybrain is picky about data input format so reformatting here # also preprocessing inputArrays = dict() yArrays = dict() for i in called: inputArrays[i] = data[called[i]].values inputArrays[i] = preprocessing.scale(inputArrays[i]) yArrays[i] = data[i].values yArrays[i] = yArrays[i].reshape(-1, 1) input_size = dict() target_size = dict() ds = dict() for i in inputArrays: input_size[i] = inputArrays[i].shape[1] target_size[i] = yArrays[i].shape[1] ds[i] = SDS(input_size[i], target_size[i]) ds[i].setField('input', inputArrays[i]) ds[i].setField('target', yArrays[i]) # doing the same for the test/validate set testinputArrays = dict() testyArrays = dict() for i in called: testinputArrays[i] = test[called[i]].values testinputArrays[i] = preprocessing.scale(testinputArrays[i]) testyArrays[i] = test[i].values testyArrays[i] = testyArrays[i].reshape(-1, 1) testinput_size = dict() testtarget_size = dict() testds = dict() for i in testinputArrays: testinput_size[i] = testinputArrays[i].shape[1]
#print str(redes[rede][y][x]) + ",", #print str(x) + ", " + str(y) #for i in range(len(arr)): #print arr[i] y_test = [] y_test_x_int = [] y_test_y_int = [] ds = SDS(len(redes), 2) for input, target in arr: y_test.append(target) y_test_y_int.append(target[1] * 22) y_test_x_int.append(target[0] * 35) ds.addSample(input, target) net = pickle.load( open( 'model.pkl', 'rb' )) #net = buildNetwork(12, 16, 12, bias = True, hiddenclass=TanhLayer) ''' #Geneic algorithm
def FitNeuralNetworkDept(dept): train_file = input_file_path + train_file_name[0] + str(dept) + train_file_name[1] test_file = input_file_path + test_file_name[0] + str(dept) + test_file_name[1] train = np.loadtxt( train_file, delimiter = ' ' ) test = np.loadtxt( test_file, delimiter = ' ' ) x_train = train[:, 0 : -1] y_train = train[:, -1] y_max = max(y_train) y_min = min(y_train) y_train = (y_train - y_min) / (y_max-y_min) y_train = y_train.reshape(-1,1) input_size = x_train.shape[1] target_size = y_train.shape[1] x_test = test[:, 0 : -1] y_test = test[:, -1] y_test = y_test.reshape(-1,1) ds_test = SDS( input_size, target_size ) ds_test.setField( 'input', x_test ) ds_test.setField( 'target', y_test ) ds = SDS( input_size, target_size ) ds.setField( 'input', x_train ) ds.setField( 'target', y_train ) hidden_size = input_size*hidden_size_ratio ''' Set the parameter online = True to do online learning! ''' n = getModel(dept = dept, hidden_size = hidden_size, input_size = input_size, target_size = target_size, online = OnlineLearningMode) #print n trainer = BackpropTrainer(n,ds ,weightdecay=weightdecay, learningrate=learningrate, lrdecay=1.0, momentum = momentum) train_mse, validation_mse = trainer.trainUntilConvergence(verbose=False, maxEpochs = epochs, validationProportion = cv_ratio, continueEpochs = 5) file_name = output_file_path + 'nn_dept' + str(dept) + '_epoch' + str(epochs) model_file = open(file_name + '_model', 'w') pickle.dump(n, model_file) model_file.close() print 'dept' + str(dept) + ' complete..!' model_info = open(file_name + '_info.txt', 'w') model_info.write('model for dept' + str(dept) +'\n\n') model_info.write(str(n) +'\n\n') model_info.write("input size: " + str(input_size) +'\n') model_info.write("hidden size: " + str(hidden_size) +'\n') model_info.write("hidden layer number: " + str(num_hidden_layer+1) +'\n') model_info.write("target size: " + str(target_size) +'\n\n') model_info.write("learningrate: " + str(learningrate) +'\n') model_info.write("momentum: " + str(momentum) +'\n') model_info.write("weightdecay: " + str(weightdecay) +'\n\n') model_info.write("epochs: " + str(epochs) +'\n') model_info.write("cv_ratio: " + str(cv_ratio) +'\n\n') model_info.write("y_min: " + str(y_min) +'\n') model_info.write("y_max: " + str(y_max) +'\n\n') model_info.write("train_mse: " + str(train_mse) +'\n\n') model_info.write("validation_mse: " + str(validation_mse)) model_info.close() n = None #To check they dept the model well.. fileObject = open(file_name + '_model', 'r') n = pickle.load(fileObject) fileObject.close() p_train = n.activateOnDataset( ds ) p_test = n.activateOnDataset( ds_test ) plot_result = np.vstack((p_train*(y_max-y_min) + y_min, p_test*(y_max-y_min) + y_min )) p_total_print = plot_result.reshape(-1,len(plot_result)) p_test_print = p_test.reshape(-1,len(p_test)) p_test_print = p_test_print*(y_max-y_min) + y_min w_file = open(output_file_path + 'walmart_sales_dept' + str(dept) + '_test_result.csv', 'wb') for row in p_test_print: for element in row: w_file.write(str(element)+'\n') break w_file.close() w_file = open(output_file_path + 'walmart_sales_dept' + str(dept) + '_train_test_result.csv', 'wb') for row in p_total_print: for element in row: w_file.write(str(element)+'\n') break w_file.close() PlotResult(y_train = y_train, plot_result = plot_result, y_max = y_max, y_min = y_min, dept = dept) return n
test = np.loadtxt(test_file, delimiter=',') x_test = test[:, 0:-1] y_test = test[:, -1] y_test = y_test.reshape(-1, 1) # you'll need labels. In case you don't have them... y_test_dummy = np.zeros(y_test.shape) input_size = x_test.shape[1] target_size = y_test.shape[1] assert (net.indim == input_size) assert (net.outdim == target_size) # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_test) ds.setField('target', y_test_dummy) # predict p = net.activateOnDataset(ds) mse = MSE(y_test, p) rmse = sqrt(mse) print "testing RMSE:", rmse np.savetxt(output_predictions_file, p, fmt='%.6f')
preprocessing = fp.feature_preprocessing() preprocessing.full_preprocess(keep_all=True) loaded_data = preprocessing.data[:10] loaded_data.drop(['DATE', 'ASS_ID', 'YEAR_DAY_AND_YEAR', 'DAY_DS', 'MONTH'], axis=1) print(preprocessing.data.columns) train = np.asarray(loaded_data) x_train = train[:, 0:-1] y_train = train[:, -1] y_train = y_train.reshape(-1, 1) input_size = x_train.shape[1] target_size = y_train.shape[1] hidden_size = 100 epochs = 600 ds = SDS(input_size, target_size) ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) net = buildNetwork(input_size, hidden_size, target_size, bias=True) trainer = BackpropTrainer(net, ds) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() rmse = sqrt(mse) print "training RMSE, epoch {}: {}".format(i + 1, rmse)