def generalization_error(net, length, inp_len, med, numb, punct): sentence_tuples = get_nice_sentences_as_tuples(MIN=length, MAX=length, include_numbers=numb, include_punctuation=punct) exper_data = SequenceClassificationDataSet(inp=inp_len, target=2) sentence_matrices = construct_sentence_matrices(sentence_tuples, medium=med) for s in sentence_matrices: insert_grammatical_sequence(exper_data, s) insert_randomized_sequence(exper_data, s) return 1 - testOnSequenceData(net, exper_data)
def generateNoisySines(npoints, nseq, noise=0.3): """ construct a 2-class dataset out of noisy sines """ x = np.arange(npoints) / float(npoints) * 20. y1 = np.sin(x + rand(1) * 3.) y2 = np.sin(x / 2. + rand(1) * 3.) DS = SequenceClassificationDataSet(1, 1, nb_classes=2) for _ in xrange(nseq): DS.newSequence() buf = rand(npoints) * noise + y1 + (rand(1) - 0.5) * noise for i in xrange(npoints): DS.addSample([buf[i]], [0]) DS.newSequence() buf = rand(npoints) * noise + y2 + (rand(1) - 0.5) * noise for i in xrange(npoints): DS.addSample([buf[i]], [1]) return DS
def __createDataset(data): ds = SequenceClassificationDataSet(inputs, 1, nb_classes=nClasses, class_labels=labels.values()) for target in classes: tupt = np.asarray([target]) # print("Target " + str(tupt)) for x in data[target]: ds.newSequence() for y in x: tup = tuple(y) ds.appendLinked(tup, tupt) print(ds.calculateStatistics()) # ds._convertToOneOfMany(bounds=[0, 1]) # print ds.getField('target') print("DS entries " + str(ds.getNumSequences())) return ds
for x in train_index: X_train.append(X[x]) y_train.append(y[x]) for x in test_index: X_test.append(X[x]) y_test.append(y[x]) # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3) # SequenceClassificationDataset(inp,target, nb_classes) # inp = input dimension # target = number of targets # nb_classes = number of classes trndata = SequenceClassificationDataSet(100,1, nb_classes=2) tstdata = SequenceClassificationDataSet(100,1, nb_classes=2) for index in range(len(y_train)): trndata.addSample(X_train[index], y_train[index]) for index in range(len(y_test)): tstdata.addSample(X_test[index], y_test[index]) trndata._convertToOneOfMany( bounds=[0.,1.] ) tstdata._convertToOneOfMany( bounds=[0.,1.] ) if exists("params.xml"): rnn = NetworkReader.readFrom('params.xml') else: # construct LSTM network - note the missing output bias
correct += 1.0 train_accuracy2 = correct / float(len(Y_train)) print "training accuracy is ", train_accuracy2 test_out = net.activateOnDataset(test_ds) Y_pred = convert_output(test_out) correct = 0.0 for i in range(len(Y_test)): if compare_list(Y_test[i], Y_pred[i]): correct += 1.0 test_accuracy2 = correct / float(len(Y_test)) print "test accuracy is ", test_accuracy2 """ x_dimension = len(X_train[0]) y_dimension = len(Y_train[0]) DS = SequenceClassificationDataSet(x_dimension, y_dimension, nb_classes=3) ds = SupervisedDataSet(x_dimension, y_dimension) for i in range(len(X_train)): ds.addSample(X_train[i], Y_train[i]) # construct LSTM network - note the missing output bias rnn = buildNetwork(x_dimension, x_dimension, y_dimension, hiddenclass=LSTMLayer, outclass=SoftmaxLayer, outputbias=False, recurrent=True) # define a training method trainer = RPropMinusTrainer(rnn, dataset=ds, verbose=True)
for _ in range(nseq): DS.newSequence() buf = rand(npoints) * noise + y1 + (rand(1) - 0.5) * noise for i in range(npoints): DS.addSample([buf[i]], [0]) DS.newSequence() buf = rand(npoints) * noise + y2 + (rand(1) - 0.5) * noise for i in range(npoints): DS.addSample([buf[i]], [1]) return DS DS = SequenceClassificationDataSet # create training and test data trndata = SequenceClassificationDataSet(X_train, Y_train) tstdata = SequenceClassificationDataSet(X_test, Y_test) # construct LSTM network - note the missing output bias rnn = buildNetwork(trndata.indim, (), trndata.outdim, hiddenclass=LSTMLayer, outclass=SoftmaxLayer, outclass=SoftmaxLayer) #buildNetwork( MultiDimensionalLSTM #rnn.addInputModule(LinearLayer(3, name='in')) #rnn.addModule(MDLSTMLayer(5,2, name='hidden')) #rnn.addOutputModule(SoftmaxLayer(1, name='out')) #