def visulizeDataSet(network, data, seqno, in_labels, out_labels): seq = data.getSequence(seqno) tmpDs = SequentialDataSet(data.indim, data.outdim) tmpDs.newSequence() for i in xrange(data.getSequenceLength(seqno)): tmpDs.addSample(seq[0][i], seq[1][i]) nplots = len(in_labels) + len(out_labels) for i in range(len(in_labels)): p = PL.subplot(nplots, 1, i + 1) p.clear() p.plot(tmpDs['input'][:, i]) p.set_ylabel(in_labels[i]) for i in range(len(out_labels)): p = PL.subplot(nplots, 1, i + 1 + len(in_labels)) p.clear() output = ModuleValidator.calculateModuleOutput(network, tmpDs) p.plot(tmpDs['target'][:, i], label='train') p.plot(output[:, i], label='sim') p.legend() p.set_ylabel(out_labels[i])
def visulizeDataSet(network, data, seqno, in_labels, out_labels): seq = data.getSequence(seqno) tmpDs = SequentialDataSet(data.indim, data.outdim) tmpDs.newSequence() for i in xrange(data.getSequenceLength(seqno)): tmpDs.addSample(seq[0][i], seq[1][i]) nplots = len(in_labels) + len(out_labels) for i in range(len(in_labels)): p = PL.subplot(nplots, 1, i + 1) p.clear() p.plot(tmpDs['input'][:, i]) p.set_ylabel(in_labels[i]) for i in range(len(out_labels)): p = PL.subplot(nplots, 1, i + 1 + len(in_labels)) p.clear() output = ModuleValidator.calculateModuleOutput(network, tmpDs) p.plot(tmpDs['target'][:, i], label='train') p.plot(output[:, i], label='sim') p.legend() p.set_ylabel(out_labels[i])
def testOnSequenceData(module, dataset): """ Fetch targets and calculate the modules output on dataset. Output and target are in one-of-many format. The class for each sequence is determined by argmax OF THE LAST ITEM IN THE SEQUENCE. """ target = dataset.getField("target") output = ModuleValidator.calculateModuleOutput(module, dataset) # determine last indices of the sequences inside dataset ends = SequenceHelper.getSequenceEnds(dataset) class_output = array([argmax(output[end]) for end in ends]) class_target = array([argmax(target[end]) for end in ends]) return Validator.classificationPerformance(class_output, class_target)
def get_segregation( file_path= "/computed/sentence_analysis_reg.pkl.gz", error_metric = regression_metric): # Loading necessary data. best_module, testData, Y_test = data.load( root + file_path ) n_samples = len(Y_test) # Computing error and sorting and grouping errors by rating groups Y_pred = ModuleValidator.calculateModuleOutput(best_module, testData) error = [ error_metric(Y_pred[i],Y_test[i]) for i in xrange(n_samples) ] err_and_revidx = zip( error, range(n_samples) ) sorted_err = {0:[], 1:[], 2:[], 3:[], 4:[]} # for some reason the last n_samples/2 are corrupted and are not alligned to the reviews. for idx in range(n_samples/2): sorted_err[ Y_test[idx] ].append( err_and_revidx[idx] ) for idx in range(5): sorted_err[idx] = sorted( sorted_err[idx] ) return sorted_err
def main(): config = MU.ConfigReader('configs/%s' % sys.argv[1]) config.read() logDir = '%s-%s' % (__file__, sys.argv[1]) os.mkdir(logDir) with open('%s/config.txt' % logDir, 'w') as outfile: json.dump(config.getConfigDict(), outfile, indent=4) dr = MU.DataReader(config['input_tsv_path']) data = dr.read(config['interested_columns']) inLabels = config['input_columns'] outLabels = config['output_columns'] tds, vds = seqDataSetPair(data, inLabels, outLabels, config['seq_label_column'], config['test_seqno'], config['validation_seqno']) inScale = config.getDataScale(inLabels) outScale = config.getDataScale(outLabels) normalizeDataSet(tds, ins = inScale, outs = outScale) normalizeDataSet(vds, ins = inScale, outs = outScale) trainData = tds validationData = vds fdim = tds.indim / 2 + 15 xdim = tds.outdim * 2 rnn = buildNetwork(tds.indim, fdim, fdim, xdim, tds.outdim, hiddenclass=SigmoidLayer, recurrent=True) rnn.addRecurrentConnection(FullConnection(rnn['hidden2'], rnn['hidden0'])) rnn.sortModules() trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True, weightdecay=0.005) #trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True) #trainer = BackpropTrainer(rnn, dataset=trainData, learningrate=0.0001, # lrdecay=1.0, momentum=0.4, verbose=True, batchlearning=False, # weightdecay=0) errTime = [] errTrain = [] errValidation = [] epochNo = 0 while True: for i in range(config['epochs_per_update']): trainer.train() epochNo += config['epochs_per_update'] NetworkWriter.writeToFile(rnn, '%s/Epoch_%d.xml' % (logDir, epochNo)) NetworkWriter.writeToFile(rnn, '%s/Latest.xml' % logDir) tOut = ModuleValidator.calculateModuleOutput(rnn, trainData) vOut = ModuleValidator.calculateModuleOutput(rnn, validationData) tScaler = config.getDataScale([config['output_scalar_label']])[0][1] tAvgErr = NP.sqrt(NP.mean((trainData['target'] - tOut) ** 2)) * tScaler vAvgErr = NP.sqrt(NP.mean((validationData['target'] - vOut) ** 2)) * tScaler tMaxErr = NP.max(NP.abs(trainData['target'] - tOut)) * tScaler vMaxErr = NP.max(NP.abs(validationData['target'] - vOut)) * tScaler errTrain.append(tAvgErr) errValidation.append(vAvgErr) errTime.append(epochNo) print "Training error: avg %5.3f degC max %5.3f degC" % (tAvgErr, tMaxErr) print "Validation error: avg %5.3f degC max %5.3f degC" % (vAvgErr, vMaxErr) print "------------------------------------------------------------------------------" if (config['visualize_on_training'] == 'yes'): PL.figure(1) PL.ioff() visulizeDataSet(rnn, trainData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() PL.figure(2) PL.ioff() visulizeDataSet(rnn, validationData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() p = PL.figure(3) PL.ioff() p.clear() PL.plot(errTime, errTrain, label = 'Train') PL.plot(errTime, errValidation, label = 'Validation') PL.legend() PL.ion() PL.draw()
plotname = os.path.join(plotdir, ('jpq2layers_plot' + str(iter))) pylab.savefig(plotname) # set-up the neural network nneuron = 5 mom = 0.98 netname = "LSL-" + str(nneuron) + "-" + str(mom) mv = ModuleValidator() v = Validator() #create the test DataSet x = numpy.arange(0.0, 1.0 + 0.01, 0.01) s = 0.5 + 0.4 * numpy.sin(2 * numpy.pi * x) tsts = SupervisedDataSet(1, 1) tsts.setField('input', x.reshape(len(x), 1)) tsts.setField('target', s.reshape(len(s), 1)) #read the train DataSet from file trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(), 'trndata')) myneuralnet = os.path.join(os.getcwd(), 'myneuralnet.xml') if os.path.isfile(myneuralnet): n = NetworkReader.readFrom(myneuralnet, name=netname) #calculate the test DataSet based on the trained Neural Network ctsts = mv.calculateModuleOutput(n, tsts) tserr = v.MSE(ctsts, tsts['target']) print 'MSE error on TSTS:', tserr myplot(trndata, tsts=tsts, ctsts=ctsts) pylab.show()
tsts.setField('target',s.reshape(len(s),1)) #read the train DataSet from file trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(),'trndata')) #create the trainer t = BackpropTrainer(n, learningrate = 0.01 , momentum = mom) #train the neural network from the train DataSet cterrori=1.0 print "trainer momentum:"+str(mom) for iter in range(25): t.trainOnDataset(trndata, 1000) ctrndata = mv.calculateModuleOutput(n,trndata) cterr = v.MSE(ctrndata,trndata['target']) relerr = abs(cterr-cterrori) cterrori = cterr print 'iteration:',iter+1,'MSE error:',cterr myplot(trndata,ctrndata,iter=iter+1) if cterr < 1.e-5 or relerr < 1.e-7: break #write the network using xml file myneuralnet = os.path.join(os.getcwd(),'myneuralnet.xml') if os.path.isfile(myneuralnet): NetworkWriter.appendToFile(n,myneuralnet) else: NetworkWriter.writeToFile(n,myneuralnet) #calculate the test DataSet based on the trained Neural Network
def main(): config = MU.ConfigReader('configs/%s' % sys.argv[1]) config.read() logDir = '%s-%s' % (__file__, sys.argv[1]) os.mkdir(logDir) with open('%s/config.txt' % logDir, 'w') as outfile: json.dump(config.getConfigDict(), outfile, indent=4) dr = MU.DataReader(config['input_tsv_path']) data = dr.read(config['interested_columns']) inLabels = config['input_columns'] outLabels = config['output_columns'] tds, vds = seqDataSetPair(data, inLabels, outLabels, config['seq_label_column'], config['test_seqno'], config['validation_seqno']) inScale = config.getDataScale(inLabels) outScale = config.getDataScale(outLabels) normalizeDataSet(tds, ins=inScale, outs=outScale) normalizeDataSet(vds, ins=inScale, outs=outScale) trainData = tds validationData = vds fdim = tds.indim / 5 + 5 xdim = tds.outdim * 2 rnn = buildNetwork(tds.indim, fdim, fdim, fdim, xdim, tds.outdim, hiddenclass=SigmoidLayer, recurrent=True) rnn.addRecurrentConnection(FullConnection(rnn['hidden0'], rnn['hidden0'])) rnn.addRecurrentConnection(FullConnection(rnn['hidden1'], rnn['hidden1'])) rnn.addRecurrentConnection(FullConnection(rnn['hidden2'], rnn['hidden2'])) rnn.sortModules() trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True, weightdecay=0.005) errTime = [] errTrain = [] errValidation = [] epochNo = 0 while True: for i in range(config['epochs_per_update']): trainer.train() epochNo += config['epochs_per_update'] NetworkWriter.writeToFile(rnn, '%s/Epoch_%d.xml' % (logDir, epochNo)) NetworkWriter.writeToFile(rnn, '%s/Latest.xml' % logDir) tOut = ModuleValidator.calculateModuleOutput(rnn, trainData) vOut = ModuleValidator.calculateModuleOutput(rnn, validationData) tScaler = config.getDataScale([config['output_scalar_label']])[0][1] tAvgErr = NP.sqrt(NP.mean((trainData['target'] - tOut)**2)) * tScaler vAvgErr = NP.sqrt(NP.mean( (validationData['target'] - vOut)**2)) * tScaler tMaxErr = NP.max(NP.abs(trainData['target'] - tOut)) * tScaler vMaxErr = NP.max(NP.abs(validationData['target'] - vOut)) * tScaler errTrain.append(tAvgErr) errValidation.append(vAvgErr) errTime.append(epochNo) print "Training error: avg %5.3f max %5.3f" % (tAvgErr, tMaxErr) print "Validation error: avg %5.3f max %5.3f" % (vAvgErr, vMaxErr) print "------------------------------------------------------------------------------" if (config['visualize_on_training'] == 'yes'): PL.figure(1) PL.ioff() visulizeDataSet(rnn, trainData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() PL.figure(2) PL.ioff() visulizeDataSet(rnn, validationData, 0, config['visualized_columns']['input'], config['visualized_columns']['output']) PL.ion() PL.draw() p = PL.figure(3) PL.ioff() p.clear() PL.plot(errTime, errTrain, label='Train') PL.plot(errTime, errValidation, label='Validation') PL.legend() PL.ion() PL.draw()
tsts.setField('input', x.reshape(len(x), 1)) tsts.setField('target', s.reshape(len(s), 1)) #read the train DataSet from file trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(), 'trndata')) #create the trainer t = BackpropTrainer(n, learningrate=0.01, momentum=mom) #train the neural network from the train DataSet cterrori = 1.0 print("trainer momentum:" + str(mom)) for iter in range(25): t.trainOnDataset(trndata, 1000) ctrndata = mv.calculateModuleOutput(n, trndata) cterr = v.MSE(ctrndata, trndata['target']) relerr = abs(cterr - cterrori) cterrori = cterr print('iteration:', iter + 1, 'MSE error:', cterr) myplot(trndata, ctrndata, iter=iter + 1) if cterr < 1.e-5 or relerr < 1.e-7: break #write the network using xml file myneuralnet = os.path.join(os.getcwd(), 'myneuralnet.xml') if os.path.isfile(myneuralnet): NetworkWriter.appendToFile(n, myneuralnet) else: NetworkWriter.writeToFile(n, myneuralnet) #calculate the test DataSet based on the trained Neural Network
pylab.savefig(plotname) # set-up the neural network nneuron = 5 mom = 0.98 netname="LSL-"+str(nneuron)+"-"+str(mom) mv=ModuleValidator() v = Validator() #create the test DataSet x = numpy.arange(0.0, 1.0+0.01, 0.01) s = 0.5+0.4*numpy.sin(2*numpy.pi*x) tsts = SupervisedDataSet(1,1) tsts.setField('input',x.reshape(len(x),1)) tsts.setField('target',s.reshape(len(s),1)) #read the train DataSet from file trndata = SupervisedDataSet.loadFromFile(os.path.join(os.getcwd(),'trndata')) myneuralnet = os.path.join(os.getcwd(),'myneuralnet.xml') if os.path.isfile(myneuralnet): n = NetworkReader.readFrom(myneuralnet,name=netname) #calculate the test DataSet based on the trained Neural Network ctsts = mv.calculateModuleOutput(n,tsts) tserr = v.MSE(ctsts,tsts['target']) print 'MSE error on TSTS:',tserr myplot(trndata,tsts = tsts,ctsts = ctsts) pylab.show()
def main(): config = MU.ConfigReader("configs/%s" % sys.argv[1]) config.read() logDir = "%s-%s" % (__file__, sys.argv[1]) os.mkdir(logDir) with open("%s/config.txt" % logDir, "w") as outfile: json.dump(config.getConfigDict(), outfile, indent=4) dr = MU.DataReader(config["input_tsv_path"]) data = dr.read(config["interested_columns"]) inLabels = config["input_columns"] outLabels = config["output_columns"] tds, vds = seqDataSetPair( data, inLabels, outLabels, config["seq_label_column"], config["test_seqno"], config["validation_seqno"] ) inScale = config.getDataScale(inLabels) outScale = config.getDataScale(outLabels) normalizeDataSet(tds, ins=inScale, outs=outScale) normalizeDataSet(vds, ins=inScale, outs=outScale) trainData = tds validationData = vds fdim = tds.indim / 5 + 5 xdim = tds.outdim * 2 rnn = buildNetwork(tds.indim, fdim, fdim, xdim, tds.outdim, hiddenclass=SigmoidLayer, recurrent=True) rnn.addRecurrentConnection(FullConnection(rnn["hidden0"], rnn["hidden0"])) rnn.addRecurrentConnection(FullConnection(rnn["hidden1"], rnn["hidden1"])) rnn.sortModules() trainer = RPropMinusTrainer(rnn, dataset=trainData, batchlearning=True, verbose=True) errTime = [] errTrain = [] errValidation = [] epochNo = 0 while True: for i in range(config["epochs_per_update"]): trainer.train() epochNo += config["epochs_per_update"] NetworkWriter.writeToFile(rnn, "%s/Epoch_%d.xml" % (logDir, epochNo)) NetworkWriter.writeToFile(rnn, "%s/Latest.xml" % logDir) tOut = ModuleValidator.calculateModuleOutput(rnn, trainData) vOut = ModuleValidator.calculateModuleOutput(rnn, validationData) tScaler = config.getDataScale([config["output_scalar_label"]])[0][1] tAvgErr = NP.sqrt(NP.mean((trainData["target"] - tOut) ** 2)) * tScaler vAvgErr = NP.sqrt(NP.mean((validationData["target"] - vOut) ** 2)) * tScaler tMaxErr = NP.max(NP.abs(trainData["target"] - tOut)) * tScaler vMaxErr = NP.max(NP.abs(validationData["target"] - vOut)) * tScaler errTrain.append(tAvgErr) errValidation.append(vAvgErr) errTime.append(epochNo) print "Training error: avg %5.3f degC max %5.3f degC" % (tAvgErr, tMaxErr) print "Validation error: avg %5.3f degC max %5.3f degC" % (vAvgErr, vMaxErr) print "------------------------------------------------------------------------------" if config["visualize_on_training"] == "yes": PL.figure(1) PL.ioff() visulizeDataSet( rnn, trainData, 0, config["visualized_columns"]["input"], config["visualized_columns"]["output"] ) PL.ion() PL.draw() PL.figure(2) PL.ioff() visulizeDataSet( rnn, validationData, 0, config["visualized_columns"]["input"], config["visualized_columns"]["output"] ) PL.ion() PL.draw() p = PL.figure(3) PL.ioff() p.clear() PL.plot(errTime, errTrain, label="Train") PL.plot(errTime, errValidation, label="Validation") PL.legend() PL.ion() PL.draw()