def createAndTrainNetworkFromFile(curs_filename, count_input_samples, count_samples, net_filename, count_layers=33, count_outputs=1, max_epochs=15000, min_epochs=300): net = buildNetwork(count_input_samples, count_layers, count_outputs) ds = SupervisedDataSet(count_input_samples, count_outputs) wb = load_workbook(filename=curs_filename) ws = wb.active for i in range(0, count_samples): loaded_data = [] for j in range(0, count_input_samples + 1): loaded_data.append( round(float(ws.cell(row=i + 1, column=j + 1).value), 4)) #ds.addSample(loaded_data[:-1], loaded_data[-1]) #print loaded_data[:-1], loaded_data[-1] ds.addSample(loaded_data[:-1], loaded_data[-1]) trainer = RPropMinusTrainer(net, verbose=True) trainer.setData(ds) a = trainer.trainUntilConvergence(maxEpochs=max_epochs, continueEpochs=min_epochs, validationProportion=0.15) net_filename = net_filename[:-4] + str(a[0][-1]) + '.xml' NetworkWriter.writeToFile(net, net_filename) result_list = [a, net_filename] return result_list
def train(self, epoch): self.ds._convertToOneOfMany() trainer = RPropMinusTrainer(self.net, dataset=self.ds, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainEpochs(epoch)
def train_cross_validate(train, label, custom_net=None, training_mse_threshold=0.40, testing_mse_threshold=0.60, epoch_threshold=10, epochs=100, hidden_size=50): # Test Set. x_train = train[0:split_at, :] y_train_slice = label.__getslice__(0, split_at) y_train = y_train_slice.reshape(-1, 1) x_test = train[split_at:, :] y_test_slice = label.__getslice__(split_at, label.shape[0]) y_test = y_test_slice.reshape(-1, 1) # Shape. input_size = x_train.shape[1] target_size = y_train.shape[1] input_size_test = x_test.shape[1] target_size_test = y_test.shape[1] # prepare dataset ds = SDS(input_size, target_size) ds.setField('input', x_train) ds.setField('target', y_train) # prepare dataset ds_test = SDS(input_size, target_size) ds_test.setField('input', x_test) ds_test.setField('target', y_test) min_mse = 1000000 # init and train if custom_net == None: net = buildNetwork(input_size, hidden_size, target_size, bias=True, hiddenclass=TanhLayer) else: print "Picking up the custom network" net = custom_net trainer = RPropMinusTrainer(net, dataset=ds, verbose=True, weightdecay=0.01, batchlearning=True) print "training for {} epochs...".format(epochs) for i in range(epochs): mse = trainer.train() print "training mse, epoch {}: {}".format(i + 1, mse) p = net.activateOnDataset(ds_test) mse = MSE(y_test, p) print "-- testing mse, epoch {}: {}".format(i + 1, mse) pickle.dump(net, open("current_run", 'wb')) if min_mse > mse: print "Current minimum found at ", i pickle.dump(net, open("current_min_epoch_" + model_file, 'wb')) min_mse = mse pickle.dump(net, open(model_file, 'wb')) return net
def recurrent_neural_network(self, train_X, train_y, test_X, test_y, n_hidden_neurons=50, iterations=100, gridsearch=False, gridsearch_training_frac=0.7, outputbias=False, error='accuracy'): """ Apply a recurrent neural network for classification upon the training data (with the specified number of hidden neurons and iterations), and use the created network to predict the outcome for both the test and training set. It returns the categorical predictions for the training and test set as well as the probabilities associated with each class, each class being represented as a column in the data frame. """ if gridsearch: n_hidden_neurons, iterations, outputbias = self. \ gridsearch_recurrent_neural_network(train_X, train_y, gridsearch_training_frac=gridsearch_training_frac, error=error) # Create numerical datasets first new_train_X, new_test_X = self.create_numerical_multiple_dataset(train_X, test_X) new_train_y, new_test_y = self.create_numerical_multiple_dataset(train_y, test_y) # Normalize the input new_train_X, new_test_X, min_X, max_X = self.normalize(new_train_X, new_test_X, 0, 1) new_train_y, new_test_y, min_y, max_y = self.normalize(new_train_y, new_test_y, 0.1, 0.9) # Create the proper pybrain datasets ds_training = self.rnn_dataset(new_train_X, new_train_y) ds_test = self.rnn_dataset(new_test_X, new_test_y) inputs = len(new_train_X.columns) outputs = len(new_train_y.columns) # Build the network with the proper parameters n = buildNetwork(inputs, n_hidden_neurons, outputs, hiddenclass=SigmoidLayer, outclass=SigmoidLayer, outputbias=outputbias, recurrent=True) # Train using back propagation through time # trainer = BackpropTrainer(n, dataset=ds_training, verbose=False, momentum=0.9, learningrate=0.01) trainer = RPropMinusTrainer(n, dataset=ds_training, verbose=False) for i in range(0, iterations): trainer.train() Y_train = [] Y_test = [] for sample, target in ds_training.getSequenceIterator(0): Y_train.append(n.activate(sample).tolist()) for sample, target in ds_test.getSequenceIterator(0): Y_test.append(n.activate(sample).tolist()) y_train_result = pd.DataFrame(Y_train, columns=new_train_y.columns, index=train_y.index) y_test_result = pd.DataFrame(Y_test, columns=new_test_y.columns, index=test_y.index) y_train_result = self.denormalize(y_train_result, min_y, max_y, 0.1, 0.9) y_test_result = self.denormalize(y_test_result, min_y, max_y, 0.1, 0.9) return y_train_result.idxmax(axis=1), y_test_result.idxmax(axis=1), y_train_result, y_test_result
def AddData(self, datainput, dataoutput): if len(dataoutput) != len(datainput): print("Not equals data", len(dataoutput), len(datainput)) return 1 self.ds = SupervisedDataSet(self.inputsize, self.outputsize) for i in xrange(len(dataoutput)): self.ds.appendLinked(datainput[i], dataoutput[i]) self.trainer = RPropMinusTrainer(self.net, dataset=self.ds, learningrate=0.1) return 0
def train(self, params, verbose=False): if params['reset_every_training']: if verbose: print 'create lstm network' random.seed(6) if params['output_encoding'] == None: self.net = buildNetwork(self.nDimInput, params['num_cells'], self.nDimOutput, hiddenclass=LSTMLayer, bias=True, outputbias=True, recurrent=True) elif params['output_encoding'] == 'likelihood': self.net = buildNetwork(self.nDimInput, params['num_cells'], self.nDimOutput, hiddenclass=LSTMLayer, bias=True, outclass=SigmoidLayer, recurrent=True) self.net.reset() ds = SequentialDataSet(self.nDimInput, self.nDimOutput) networkInput = self.window(self.networkInput, params) targetPrediction = self.window(self.targetPrediction, params) # prepare a training data-set using the history for i in xrange(len(networkInput)): ds.addSample(self.inputEncoder.encode(networkInput[i]), self.outputEncoder.encode(targetPrediction[i])) mycount = 0 if params['num_epochs'] > 1: trainer = RPropMinusTrainer(self.net, dataset=ds, verbose=verbose) if verbose: print " train LSTM on ", len( ds), " records for ", params['num_epochs'], " epochs " if len(networkInput) > 1: trainer.trainEpochs(params['num_epochs']) else: self.trainer.setData(ds) self.trainer.train() # run through the training dataset to get the lstm network state right self.net.reset() for i in xrange(len(networkInput)): self.net.activate(ds.getSample(i)[0])
def fit(self, X, y): """ Trains the classifier :param pandas.DataFrame X: data shape [n_samples, n_features] :param y: labels of events - array-like of shape [n_samples] .. note:: doesn't support sample weights """ dataset = self._prepare_net_and_dataset(X, y, 'classification') if self.use_rprop: trainer = RPropMinusTrainer(self.net, etaminus=self.etaminus, etaplus=self.etaplus, deltamin=self.deltamin, deltamax=self.deltamax, delta0=self.delta0, dataset=dataset, learningrate=self.learningrate, lrdecay=self.lrdecay, momentum=self.momentum, verbose=self.verbose, batchlearning=self.batchlearning, weightdecay=self.weightdecay) else: trainer = BackpropTrainer(self.net, dataset, learningrate=self.learningrate, lrdecay=self.lrdecay, momentum=self.momentum, verbose=self.verbose, batchlearning=self.batchlearning, weightdecay=self.weightdecay) if self.epochs < 0: trainer.trainUntilConvergence( maxEpochs=self.max_epochs, continueEpochs=self.continue_epochs, verbose=self.verbose, validationProportion=self.validation_proportion) else: for i in range(self.epochs): trainer.train() self.__fitted = True return self
def trainNetwork(net, sample_list, validate_list, net_filename, max_epochs=5500, min_epochs=300): count_input_samples = len(sample_list) count_outputs = len(validate_list) ds = SupervisedDataSet(count_input_samples, count_outputs) ds.addSample(sample_list, validate_list) trainer = RPropMinusTrainer(net, verbose=True) trainer.setData(ds) trainer.trainUntilConvergence(maxEpochs=max_epochs, continueEpochs=min_epochs) NetworkWriter.writeToFile(net, net_filename) return net
def partial_fit(self, X, y): """ Additional training of the estimator :param pandas.DataFrame X: data shape [n_samples, n_features] :param y: labels of events - array-like of shape [n_samples] :return: self """ dataset = self._prepare_dataset(X, y, self._model_type) if not self.is_fitted(): self._prepare_net(dataset=dataset, model_type=self._model_type) if self.use_rprop: trainer = RPropMinusTrainer(self.net, etaminus=self.etaminus, etaplus=self.etaplus, deltamin=self.deltamin, deltamax=self.deltamax, delta0=self.delta0, dataset=dataset, learningrate=self.learningrate, lrdecay=self.lrdecay, momentum=self.momentum, verbose=self.verbose, batchlearning=self.batchlearning, weightdecay=self.weightdecay) else: trainer = BackpropTrainer(self.net, dataset, learningrate=self.learningrate, lrdecay=self.lrdecay, momentum=self.momentum, verbose=self.verbose, batchlearning=self.batchlearning, weightdecay=self.weightdecay) if self.epochs < 0: trainer.trainUntilConvergence( maxEpochs=self.max_epochs, continueEpochs=self.continue_epochs, verbose=self.verbose, validationProportion=self.validation_proportion) else: trainer.trainEpochs(epochs=self.epochs, ) return self
def AddDataSequential(self, data): self.ds = SequentialDataSet(self.inputsize, self.outputsize) for i in xrange(len(data) - 1, 0, -1): t = data[i] k = i - 1 while k > -1: self.ds.appendLinked(data[k], t) k -= 1 self.ds.newSequence() """print self.ds.getNumSequences() for i in range(self.ds.getNumSequences()): for input, target in self.ds.getSequenceIterator(i): print i, TransToIntList_45(input), TransToIntList_45(target)""" self.trainer = RPropMinusTrainer(self.net, dataset=self.ds, learningrate=0.1) return 0
def createAndTrainNetworkFromList(train_list, count_input_samples, net_filename, count_layers=33, count_outputs=1, max_epochs=15000, min_epochs=300): net = buildNetwork(count_input_samples, count_layers, count_outputs) ds = SupervisedDataSet(count_input_samples, count_outputs) count_samples = len(train_list) for i in range(0, count_samples): ds.addSample(train_list[i][:-count_outputs], train_list[i][-count_outputs]) trainer = RPropMinusTrainer(net, verbose=True) trainer.setData(ds) a = trainer.trainUntilConvergence(maxEpochs=max_epochs, continueEpochs=min_epochs, validationProportion=0.15) net_filename = net_filename[:-4] + str(a[0][-1]) + '.xml' NetworkWriter.writeToFile(net, net_filename) result_list = [a, net_filename] return result_list
def train(self, input_row, output_row): """ Training network by r-prop. PARTITION_OF_EDUCATION_VERIFICATION_SET - education|validation ratio MAX_EPOCHS - count of max steps of education OUTCASTING_EPOCHS - if education can't get out of local minimum it given count of steps, it stops """ self._form_set(input_row, output_row) trainer = RPropMinusTrainer(module=self.network, dataset=self.data_set) self.training_errors, self.validation_errors = trainer.trainUntilConvergence( validationProportion=self.settings.training_part_fraction, maxEpochs=self.settings.maximum_training_epochs, continueEpochs=self.settings.quit_epochs) len_validate = int(len(output_row[0]['data']) * (1 - self.settings.training_part_fraction)) results_of = [list(self.network.activate(x))[0] for x in self.inputs_for_validation[len_validate:]] self.mse = sum(map(lambda result, target: fabs(result - target), list(results_of), list(output_row[0]['data'][len_validate:]))) / len(results_of) print 'DUMB-dd' for it in results_of: print it print 'DUMB-pp' for it in list(output_row[0]['data'][len_validate:]): print it print '| | |-MSE = ', self.mse
# one output neuron per class training_dataset._convertToOneOfMany(bounds=[0, 1]) # same for the independent test data set testing_dataset = generate_data(test=True) testing_dataset._convertToOneOfMany(bounds=[0, 1]) # build a feed-forward network with 20 hidden units, plus # a corresponding trainer fnn = buildNetwork(training_dataset.indim, 15, 15, training_dataset.outdim, outclass=SoftmaxLayer) #trainer = BackpropTrainer( fnn, dataset=training_dataset,verbose=True) trainer = RPropMinusTrainer(fnn, dataset=training_dataset, verbose=True) for i in range(500): # train the network for 1 epoch trainer.trainEpochs(15) # evaluate the result on the training and test data trnresult = percentError(trainer.testOnClassData(), training_dataset['class']) tstresult = percentError(trainer.testOnClassData(dataset=testing_dataset), testing_dataset['class']) # print the result print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult
def recurrent_neural_network(self, train_X, train_y, test_X, test_y, n_hidden_neurons=50, iterations=100, gridsearch=False, gridsearch_training_frac=0.7, outputbias=False, error='accuracy'): if gridsearch: n_hidden_neurons, iterations, outputbias = self.gridsearch_recurrent_neural_network( train_X, train_y, test_X, test_y, gridsearch_training_frac=gridsearch_training_frac, error=error) # Create numerical datasets first. new_train_X, new_test_X = self.create_numerical_multiple_dataset( train_X, test_X) new_train_y, new_test_y = self.create_numerical_multiple_dataset( train_y, test_y) # We normalize the input..... new_train_X, new_test_X, min_X, max_X = self.normalize( new_train_X, new_test_X, 0, 1) new_train_y, new_test_y, min_y, max_y = self.normalize( new_train_y, new_test_y, 0.1, 0.9) # Create the proper pybrain datasets. ds_training = self.rnn_dataset(new_train_X, new_train_y) ds_test = self.rnn_dataset(new_test_X, new_test_y) inputs = len(new_train_X.columns) outputs = len(new_train_y.columns) # Build the network with the proper parameters. n = buildNetwork(inputs, n_hidden_neurons, outputs, hiddenclass=SigmoidLayer, outclass=SigmoidLayer, outputbias=outputbias, recurrent=True) # Train using back propagation through time. #trainer = BackpropTrainer(n, dataset=ds_training, verbose=False, momentum=0.9, learningrate=0.01) trainer = RPropMinusTrainer(n, dataset=ds_training, verbose=False) for i in range(0, iterations): trainer.train() # for mod in n.modules: # for conn in n.connections[mod]: # print conn # for cc in range(len(conn.params)): # print conn.whichBuffers(cc), conn.params[cc] # Determine performance on the training and test set.... # Y_train = [] # for i in range(0, len(new_train_X.index)): # input = tuple(new_train_X.ix[i,:].values) # output = n.activate(input) # Y_train.append(output) # Y_test = [] # for i in range(0, len(new_test_X.index)): # Y_test.append(n.activate(tuple(new_test_X.ix[i,:].values))) Y_train = [] Y_test = [] for sample, target in ds_training.getSequenceIterator(0): Y_train.append(n.activate(sample).tolist()) for sample, target in ds_test.getSequenceIterator(0): Y_test.append(n.activate(sample).tolist()) y_train_result = pd.DataFrame(Y_train, columns=new_train_y.columns, index=train_y.index) y_test_result = pd.DataFrame(Y_test, columns=new_test_y.columns, index=test_y.index) # print y_train_result y_train_result = self.denormalize(y_train_result, min_y, max_y, 0.1, 0.9) y_test_result = self.denormalize(y_test_result, min_y, max_y, 0.1, 0.9) # plot.plot(train_y.index, train_y) # plot.hold(True) # plot.plot(train_y.index, pred_train_y_prob) # plot.show() return y_train_result.idxmax(axis=1), y_test_result.idxmax( axis=1), y_train_result, y_test_result
def get_new_trainer(self, data_set): if not self.neural_net: self.build_neural_net() return RPropMinusTrainer(self.neural_net, dataset=data_set)
# build our recurrent network with 10 hidden neurodes, one recurrent # connection, using tanh activation functions net = RecurrentNetwork() hidden_neurodes = 10 net.addInputModule(LinearLayer(len(train_set["input"][0]), name="in")) net.addModule(TanhLayer(hidden_neurodes, name="hidden1")) net.addOutputModule(LinearLayer(len(train_set["target"][0]), name="out")) net.addConnection(FullConnection(net["in"], net["hidden1"], name="c1")) net.addConnection(FullConnection(net["hidden1"], net["out"], name="c2")) net.addRecurrentConnection( FullConnection(net["out"], net["hidden1"], name="cout")) net.sortModules() net.randomize() # train for 30 epochs (overkill) using the rprop- training algorithm trainer = RPropMinusTrainer(net, dataset=train_set, verbose=True) trainer.trainOnDataset(train_set, 30) # test on training set predictions_train = np.array( [net.activate(train_set["input"][i])[0] for i in xrange(len(train_set))]) plt.plot(train_set["target"], c="k") plt.plot(predictions_train, c="r") plt.show() # and on test set predictions_test = np.array( [net.activate(test_set["input"][i])[0] for i in xrange(len(test_set))]) plt.plot(test_set["target"], c="k") plt.plot(predictions_test, c="r") plt.show()
nr.append(ratio) print ratio, column else: print column, "not an int or long" return np.array(nr[:-1]), nr[-1] data = cursor.execute("select %s from adult_data" % columns).fetchall() dataset = SupervisedDataSet(8, 1) for row in data: xd, yd = createNPRow(row) dataset.addSample(xd, yd) nn = buildNetwork(8, 3, 1) trainer = RPropMinusTrainer(nn) trainer.setData(dataset) for x in range(5): error = trainer.train() print error errors, success = 0, 0 for row in cursor.execute("select %s from adult_test" % columns).fetchall(): xd, yd = createNPRow(row) check = int(round(nn.activate(xd[:8])[0])) if check > 1: check = 1 prediction = possibilities['relation_to_50k_plus'][check] actual = possibilities['relation_to_50k_plus'][yd] if prediction == actual: match = "match"
def train_nn(): vta = MLFMFCCOnlineAlignedArray(usec0=False, n_last_frames=n_last_frames) # vta.append_mlf(mlf_sil) # vta.append_trn(train_data_sil) vta.append_mlf(mlf_speech) vta.append_trn(train_data_speech) mfcc = vta.__iter__().next() print "MFCC length:", len(mfcc[0]) input_size = len(mfcc[0]) if sigmoid: net = buildNetwork(input_size,n_hidden_units,n_hidden_units,n_hidden_units,n_hidden_units,2, hiddenclass=SigmoidLayer, outclass=SoftmaxLayer, bias = True, fast = arac) else: net = buildNetwork(input_size,n_hidden_units,n_hidden_units,n_hidden_units,n_hidden_units,2, hiddenclass=TanhLayer, outclass=SoftmaxLayer, bias = True, fast = arac) dc_acc = deque(maxlen=20) dt_acc = deque(maxlen=20) print "Generating the MFCC features" vta_new = [] i = 0 for frame, label in vta: if i % (n_max_frames / 10) == 0: print "Already processed: %.2f%% of data" % (100.0*i/n_max_frames) if i > n_max_frames: break i += 1 vta_new.append((frame, label)) vta = vta_new for epoch in range(n_max_epoch): i = 1 m = 0 ds = SupervisedDataSet(input_size, 2) c_acc = 0.0 c_sil = 0.0 t_acc = 0.0 t_sil = 0.0 for frame, label in vta: #print frame if (i % n_max_frames_per_minibatch) != 0: if label == "sil": ds.addSample(frame, (1,0)) else: ds.addSample(frame, (0,1)) else: a = net.activateOnDataset(ds) acc, sil = get_accuracy(ds, a) print print "-"*120 if m < n_crossvalid_minibatches: print "Cross-validation" c_acc = running_avg(c_acc, m, acc) c_sil = running_avg(c_sil, m, sil) else: print "Training" t_acc = running_avg(t_acc, m - n_crossvalid_minibatches, acc) t_sil = running_avg(t_sil, m - n_crossvalid_minibatches, sil) if bprop: trainer = BackpropTrainer(net, dataset = ds) else: trainer = RPropMinusTrainer(net, dataset = ds) trainer.train() m += 1 print print "n_max_frames, max_files, max_frames_per_segment, trim_segments, n_max_epoch, n_max_frames_per_minibatch, n_hidden_units, sigmoid, arac, n_last_frames, n_crossvalid_minibatches, bprop" print n_max_frames, max_files, max_frames_per_segment, trim_segments, n_max_epoch, n_max_frames_per_minibatch, n_hidden_units, sigmoid, arac, n_last_frames, n_crossvalid_minibatches, bprop print "Epoch: %d Mini-batch: %d" % (epoch, m) print print "Cross-validation stats" print "------------------------" print "Epoch predictive accuracy: %0.2f" % c_acc print "Last epoch accs:", ["%.2f" % x for x in dc_acc] print "Epoch sil bias: %0.2f" % c_sil print print "Training stats" print "------------------------" print "Epoch predictive accuracy: %0.2f" % t_acc print "Last epoch accs:", ["%.2f" % x for x in dt_acc] print "Epoch sil bias: %0.2f" % t_sil print print "Minibatch stats" print "------------------------" print "Mini-batch predictive accuracy: %0.2f" % acc print "Mini-batch sil bias: %0.2f" % sil ds = SupervisedDataSet(input_size, 2) i += 1 dc_acc.append(c_acc) dt_acc.append(t_acc)
arg = (i / n) * 6 - 3 x.append(arg) r = f(arg) + (random() - 0.5) * 0.2 y.append(f(arg)) y_noise.append(r) ds.addSample((arg), (r)) trainer_big = BackpropTrainer(net_big, ds, learningrate=0.01, lrdecay=1.0, momentum=0.0, weightdecay=0.0) # RProp-, cf. [Igel&Huesken, Neurocomputing 50, 2003 trainer = RPropMinusTrainer(net, dataset=ds) # trainer.trainUntilConvergence() for i in range(100): trainer.train() for i in range(10): trainer_big.train() for i in range(n): arg = (i / n) * 6 - 3 y_n.append(net.activate([arg])) y_n_big.append(net_big.activate([arg])) fig = plt.figure()