def crossValidation(self, filename): trainer = BackpropTrainer(self.net) crossValidator = CrossValidator(trainer, self.createDataSetFromFile(filename), n_folds=10) result = crossValidator.validate() print result * 100, "%"
def nnTest(tx, ty, rx, ry, iterations): print "NN start" print strftime("%a, %d %b %Y %H:%M:%S", localtime()) resultst = [] resultsr = [] positions = range(iterations) network = buildNetwork(16, 16, 1, bias=True) ds = ClassificationDataSet(16, 1, class_labels=["1", "0"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.05) validator = CrossValidator(trainer, ds, n_folds=10) print validator.validate() for i in positions: print trainer.train() resultst.append(sum((np.array([round(network.activate(test)) for test in tx]) - ty)**2)/float(len(ty))) resultsr.append(sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry))) print i, resultst[i], resultsr[i] plt.plot(positions, resultst, 'g-', positions, resultsr, 'r-') plt.axis([0, iterations, 0, 1]) plt.ylabel("Percent Error") plt.xlabel("Network Epoch") plt.title("Neural Network Error") plt.savefig('nn.png', dpi=500) print "NN end" print strftime("%a, %d %b %Y %H:%M:%S", localtime())
def cvnntester(tx, ty, rx, ry, iterations, folds): network = buildNetwork(100, 50, 1, bias=True) ds = ClassificationDataSet(100,1, class_labels=["valley", "hill"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.005) cv = CrossValidator(trainer, ds, n_folds=folds, max_epochs=iterations, verbosity=True) print cv.validate() print sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry))
def CrossValidation(self,n_fold=5,num_neuron=50): data_set_this=self.data_set data_set_this._convertToOneOfMany() print "Training with number of neuron :",num_neuron network_this=buildNetwork(data_set_this.indim,num_neuron,data_set_this.outdim,bias=True,hiddenclass=SigmoidLayer) trainer_this=BackpropTrainer(network_this,dataset=data_set_this,learningrate=0.001,momentum=0,verbose=True,weightdecay=0.1) CV=CrossValidator(trainer_this,data_set_this,num_neuron,n_folds=n_fold,max_epochs=3) perf_this=CV.validate() print "The performance of this network with CV is: ", perf_this
def cross_validate(self, dataset=None): trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01) validator = CrossValidator(trainer=trainer, dataset=dataset, n_folds=10) mean_validation_result = validator.validate() self.cross_validation_result = mean_validation_result logger.info('cross val result: {result}'.format( result=self.cross_validation_result))
def cvnntester(tx, ty, rx, ry, iterations, folds): network = buildNetwork(100, 50, 1, bias=True) ds = ClassificationDataSet(100, 1, class_labels=["valley", "hill"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.005) cv = CrossValidator(trainer, ds, n_folds=folds, max_epochs=iterations, verbosity=True) print cv.validate() print sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2) / float(len(ry))
def _validate(self, params): """ The overridden validate function, that uses cross-validation in order to determine the params' performance value. """ trainer = self._getTrainerForParams(params) return CrossValidator(trainer, self._dataset, self._n_folds, **self._validator_kwargs).validate()
def _validate(self, params): """ See GridSearchCostGamma """ glob_idx = tuple(params) perf = self._performances if glob_idx not in perf: trainer = self._getTrainerForParams(params) local_perf = CrossValidator(trainer, self._dataset, self._n_folds, **self._validator_kwargs).validate() perf[glob_idx] = local_perf else: local_perf = perf[glob_idx] return local_perf
def vali(): from pybrain.tools.validation import ModuleValidator from pybrain.tools.validation import CrossValidator with open('new_data1.txt') as data_file: data = json.load(data_file) m = [d[0] for d in data] case = [min([a for a, s, d in m]), float(max([a for a, s, d in m])-min([a for a, s, d in m]))] week = [min([s for a, s, d in m]), float(max([s for a, s, d in m])-min([s for a, s, d in m]))] grid = [min([d for a, s, d in m]), float(max([d for a, s, d in m])-min([d for a, s, d in m]))] ds = SupervisedDataSet(3, 1) import random random.shuffle(data) print len(data) for i in xrange(0, len(data)): # print "Adding {}th data sample".format(i), x1 = float(data[i][0][0] - case[0])/case[1] x2 = float(data[i][0][1] - week[0])/week[1] x3 = float(data[i][0][2] - grid[0])/grid[1] input = (x1, x2, x3) output = data[i][1] ds.addSample(input, output) # print ":: Done" print "Train" net = buildNetwork(3, 3, 1, bias=True) tstdata, trndata = ds.splitWithProportion( 0.33 ) trainer = BackpropTrainer(net, trndata) mse = [] modval = ModuleValidator() for i in range(100): trainer.trainEpochs(1) trainer.trainOnDataset(dataset=trndata) cv = CrossValidator(trainer, trndata, n_folds=10, valfunc=modval.MSE) mse_val = cv.validate() print "MSE %f @ %i" % (mse_val, i) mse.append(mse_val) with open('cross_validation.json', 'w') as outfile: json.dump(mse, outfile, indent=4)
def CV_best_struct(self,n_fold=5): data_set_this = self.data_set perf=[] for num_neuron in np.arange(200,4000,500): print "Training with number of neuron :",num_neuron network_this=buildNetwork(data_set_this.indim,num_neuron,data_set_this.outdim,bias=True,hiddenclass=SigmoidLayer,outclass=SoftmaxLayer) trainer_this=BackpropTrainer(network_this,dataset=data_set_this,learningrate=0.001,momentum=0,verbose=False,weightdecay=0.1) '''here, the data set should be raw data instead of pca data''' '''do pca after data spliting ''' CV=CrossValidator(trainer_this,data_set_this,num_neuron,n_folds=n_fold,max_epochs=3) perf_this=CV.validate() perf.append(perf_this) print "The performance of this network with CV is: ", perf_this print "All performance: ", perf output=open("CV_results_200to4000.csv",'wb') filewriter=csv.writer(output) filewriter.writerow(perf)
def train(self, args): if(self.data.ds == None): print("Can't train without loaded data") return if(args != [] and len(args) >= 2): self.net.epochs = int(args[1]) if(self.net.trainingType == "gradient"): if(self.trainer == None): self.trainer, self.returnsNet = self.__getGradientTrainer(); self.__train(self.trainer.trainEpochs, self.returnsNet) elif(self.net.trainingType == "optimization"): if(self.trainer == None): self.trainer, self.returnsNet = self.__getOptimizationTrainer(); self.__train(self.trainer.learn, self.returnsNet) return elif(self.trainingType == "crossval"): if(self.trainer == None): self.trainer, self.returnsNet = self.__getGradientTrainer(); evaluation = ModuleValidator.classificationPerformance(self.trainer.module, self.data.ds) validator = CrossValidator(trainer=self.trainer, dataset=self.trainer.ds, n_folds=5, valfunc=evaluation, verbose=True, max_epochs=1) print(validator.validate()) else: raise Exception("Cannot create trainer, no network type specified" + self.trainingType)
'o': 1, 'b': 2, } def row_preprocess(row): return [translation[x] for x in row] if __name__ == "__main__": raw_data = list(csv.reader(open("tic-tac-toe.data"))) targets = [1 if x[-1] == 'positive' else 0 for x in raw_data] inputs = [row_preprocess(x[:-1]) for x in raw_data] alldata = ClassificationDataSet( 9, class_labels=['negative', 'positive']) for (i, t) in zip(inputs, targets): alldata.addSample(i, [t]) network = buildNetwork(9, int(sys.argv[1]), 1, hiddenclass=SigmoidLayer, outclass=LinearLayer) trainer = BackpropTrainer(network, weightdecay=0.001) validator = CrossValidator(trainer, alldata, n_folds=2, valfunc=ModuleValidator.MSE) validator.setArgs(max_epochs=500) ret = validator.validate() print(ret)
from pybrain.tools.validation import CrossValidator, ModuleValidator translation = {"x": 0, "o": 1, "b": 2} def row_preprocess(row): return [translation[x] for x in row] if __name__ == "__main__": raw_data = list(csv.reader(open("tic-tac-toe.data"))) targets = [1 if x[-1] == "positive" else 0 for x in raw_data] inputs = [row_preprocess(x[:-1]) for x in raw_data] alldata = ClassificationDataSet(9, class_labels=["negative", "positive"]) for (i, t) in zip(inputs, targets): alldata.addSample(i, [t]) network = buildNetwork(9, 3, 1, hiddenclass=SigmoidLayer, outclass=LinearLayer) # validation_data, training_data = alldata.splitWithProportion(0.25) trainer = BackpropTrainer(network, verbose=True, weightdecay=0.001, learningrate=0.1) trainer.setData(alldata) trainer.trainUntilConvergence(maxEpochs=6000) validator = CrossValidator(trainer, alldata, n_folds=10, valfunc=ModuleValidator.MSE) ret = validator.validate() print(ret)
def crossValidation(self, filename): trainer = BackpropTrainer(self.net) crossValidator = CrossValidator(trainer, self.createDataSetFromFile(filename), n_folds=10) result = crossValidator.validate() print result*100, "%"
def cross_validate(self, dataset=None): trainer = BackpropTrainer(self.neural_network, dataset=dataset, momentum=0.1, verbose=True, weightdecay=0.01) validator = CrossValidator(trainer=trainer, dataset=dataset, n_folds=10) mean_validation_result = validator.validate() self.cross_validation_result = mean_validation_result logger.info('cross val result: {result}'.format(result=self.cross_validation_result))
alldata.addSample([-1, -1], [0]) alldata.addSample([-1, -1], [0]) alldata.addSample([-1, -1], [0]) alldata.addSample([-1, -1], [0]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) alldata.addSample([1, 1], [1]) tstdata, trndata = alldata.splitWithProportion(0.25) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() # We can also examine the dataset print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] fnn = buildNetwork( trndata.indim, 5, trndata.outdim, recurrent=False ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01 ) # I am not sure about this, I don't think my production code is implemented like this modval = ModuleValidator() trainer.trainEpochs(20) trainer.trainOnDataset(dataset=trndata) cv = CrossValidator( trainer, trndata, n_folds=5, valfunc=modval.MSE ) print "MSE %f" %( cv.validate() )
import pylab, numpy from pybrain.tools.shortcuts import buildNetwork from pybrain.structure import TanhLayer from pybrain.datasets import SupervisedDataSet from pybrain.supervised.trainers import BackpropTrainer from pybrain.tools.validation import CrossValidator, ModuleValidator results = pylab.loadtxt('credit.txt') target = results[:, -1] data = numpy.delete(results, -1, 1) #print "data", tuple(data[0]) #print "target", (target[0],) #net = buildNetwork(14, 10, 1) net = buildNetwork(14, 10, 1, hiddenclass=TanhLayer) #print net.activate([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]) ds = SupervisedDataSet(14, 1) for i in range(len(data)): ds.addSample(tuple(data[i]), (target[i], )) trainer = BackpropTrainer(net, ds) evaluation = ModuleValidator() validator = CrossValidator(trainer=trainer, dataset=trainer.ds, n_folds=5, valfunc=evaluation.MSE) print(validator.validate())
alldata.addSample([1,1],[1]) alldata.addSample([1,1],[1]) alldata.addSample([1,1],[1]) alldata.addSample([1,1],[1]) alldata.addSample([1,1],[1]) tstdata, trndata = alldata.splitWithProportion( 0.25 ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) #We can also examine the dataset print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] fnn = buildNetwork( trndata.indim, 5, trndata.outdim, recurrent=False ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01 ) # I am not sure about this, I don't think my production code is implemented like this modval = ModuleValidator() for i in range(1000): trainer.trainEpochs(1) trainer.trainOnDataset(dataset=trndata) cv = CrossValidator( trainer, trndata, n_folds=5, valfunc=modval.MSE ) print "MSE %f @ %i" %( cv.validate(), i ) print tstdata print ">", trainer.testOnClassData(dataset=tstdata)
def get_pybrain_data_set(data, input_cols, target_cols=1): data_set = SupervisedDataSet(input_cols, target_cols) for row in data: # get first X columns for inputs # (technically "all indices less than X") # get last column as target data_set.addSample(tuple(row[:input_cols]), tuple([row[input_cols]])) return data_set # normalize all values p['wti_var'] = normalize(p['wti_var']) p['wti_skew'] = normalize(p['wti_skew']) p['wti_curt'] = normalize(p['wti_curt']) p['i_entr'] = normalize(p['i_entr']) # shuffle up data p.reindex(np.random.permutation(p.index)) trainer = BackpropTrainer( buildNetwork(4, 5, 1), # 2 input nodes, 3 hidden nodes, 1 output node get_pybrain_data_set(p.as_matrix(), 4), verbose=True) #print(trainer.train()) #evaluator = ModuleValidator.classificationPerformance(trainer.module, trainer.ds) cv = CrossValidator(trainer, trainer.ds, n_folds=5) cv.setArgs(max_epochs=2, verbose=True) print(cv.validate())
trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1] ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) fnn = buildNetwork( trndata.indim,120,trndata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.00001) modval = ModuleValidator() # We define the number of iterations we want to train our model. for i in range(100): trainer.trainEpochs(1) trnresult = percentError(trainer.testOnClassData(dataset=trndata),trndata['class']) print "epoch : " , trainer.totalepochs," train error: " , trnresult # We validate our model by applying the n-folds technique and check the Mean Square Error cv = CrossValidator( trainer, trndata, n_folds=5, valfunc=modval.MSE ) print "MSE %f at loop %i"%(cv.validate(),i) # Finally we test our data on the model we built perror = percentError(trainer.testOnClassData(dataset=tstdata),tstdata['class']) print " Percent error on test data is - ",100.0 - perror # We also take a dump of the model and the top features fileObject2 = open('ANNDUMP', 'wb') pickle.dump(fnn, fileObject2) fileObject2.close() fObject = open('VOCABDUMP','wb') pickle.dump(vocab, fObject) fObject.close()
language, num = g.split("/")[-1].split("_") languages.append(Language(io.open(g, "r+"), language)) n = Network(languages) n.train() n.trainer.verbose = True n.trainer.trainUntilConvergence() def correctValFunc(output, target): assert len(output) == len(target) n_correct = 0 for idx, instance in enumerate(output): # This will find the maximum liklihood language classification = instance.argmax(axis=0) objective = target[idx].argmax(axis=0) if objective == classification: n_correct += 1 return 1 - (float(n_correct) / float(len(output))) def correct(output, target): return ModuleValidator.validate(correctValFunc, output, target) cv = CrossValidator(n.trainer, n.dataSet, valfunc=correct, n_folds=2) print cv.validate()
data_set = common.get_bc_data_for_nn() test_means = [] test_std = [] x_vals = [2, 3, 4, 5, 6, 7, 8] for x in x_vals: means = [] for i in range(20): trainer = BackpropTrainer( buildNetwork(3, x, 1), data_set, verbose=True ) print "%d %d" % (x, i) trainer.trainEpochs(3) cv = CrossValidator(trainer, trainer.ds, n_folds=5, valfunc=ModuleValidator.MSE) means.append(cv.validate()) test_means.append(np.mean(means)) test_std.append(np.std(means)) common.plot_nn_mse( title = "Breast Cancer Survival Neural Network", x_label = "Number of hidden nodes", x_vals = x_vals, y_means = np.array(test_means), y_std = np.array(test_std) )
languages = [] for g in glob.glob("./data/*.txt"): language, num = g.split("/")[-1].split("_") languages.append(Language(io.open(g, 'r+'), language)) n = Network(languages) n.train() n.trainer.verbose = True n.trainer.trainUntilConvergence() def correctValFunc(output, target): assert len(output) == len(target) n_correct = 0 for idx, instance in enumerate(output): # This will find the maximum liklihood language classification = instance.argmax(axis=0) objective = target[idx].argmax(axis=0) if objective == classification: n_correct += 1 return 1 - (float(n_correct) / float(len(output))) def correct(output, target): return ModuleValidator.validate(correctValFunc, output, target) cv = CrossValidator(n.trainer, n.dataSet, valfunc=correct, n_folds=2) print cv.validate()
#将类别转化为5位 dsTrain_test._convertToOneOfMany(bounds=[0, 1]) dsTest_test._convertToOneOfMany(bounds=[0, 1]) #print dsTrain_test['target'] #划分训练集跟测试集 dsTrain,dsTest = dsBuild(data) #训练神经网络 netModel = netBuild(dsTrain_test) modval = ModuleValidator() netModel.trainEpochs(20) netModel.trainUntilConvergence(maxEpochs=1000) cv = CrossValidator(netModel, dsTrain_test, n_folds=5, valfunc=modval.MSE ) print "MSE %f" %( cv.validate() ) from sklearn.externals import joblib joblib.dump(netModel, "train_model.m") netModel =joblib.load("train_model.m") #f1值检验 pred=[] really =[] yuanma = [] calma = []
def compare_l2_regularization(): train_features, train_labels, test_features, test_labels = get_breast_cancer_data( ) optimal_num_layers = 6 num_neurons = [optimal_num_layers * [16]] start_time = datetime.now() train_accuracy1 = [] test_accuracy1 = [] train_accuracy2 = [] test_accuracy2 = [] iterations = range(250) nn1 = buildNetwork(30, 16, 1, bias=True) nn2 = buildNetwork(30, 16, 1, bias=True) dataset = ClassificationDataSet(len(train_features[0]), len(train_labels[0]), class_labels=["1", "2"]) for instance in range(len(train_features)): dataset.addSample(train_features[instance], train_labels[instance]) trainer1 = BackpropTrainer(nn1, dataset, weightdecay=0.0001) validator1 = CrossValidator(trainer1, dataset) print(validator1.validate()) trainer2 = BackpropTrainer(nn2, dataset, weightdecay=0.001) validator2 = CrossValidator(trainer2, dataset) print(validator2.validate()) for iteration in iterations: train_accuracy1.append( sum((np.array( [np.round(nn1.activate(test)) for test in train_features]) - train_labels)**2) / float(len(train_labels))) test_accuracy1.append( sum((np.array( [np.round(nn1.activate(test)) for test in test_features]) - test_labels)**2) / float(len(test_labels))) train_accuracy2.append( sum((np.array( [np.round(nn2.activate(test)) for test in train_features]) - train_labels)**2) / float(len(train_labels))) test_accuracy2.append( sum((np.array( [np.round(nn2.activate(test)) for test in test_features]) - test_labels)**2) / float(len(test_labels))) plt.plot(iterations, train_accuracy1) plt.plot(iterations, test_accuracy1) plt.plot(iterations, train_accuracy2) plt.plot(iterations, test_accuracy2) plt.legend([ "Train Accuracy (0.0001)", "Test Accuracy (0.0001)", "Train Accuracy (0.001)", "Test Accuracy (0.001" ]) plt.xlabel("Num Epoch") plt.ylabel("Percent Error") plt.title("Neural Network on Breast Cancer Data with " + str(num_neurons) + " layers") plt.savefig("nn_breast_cancer_weight_decay.png")