def test_step_decay(self): self.assert_invalid_step_values( algorithms.step_decay( initial_value=0.1, reduction_freq=10, ), initial_value=0.1, final_value=0.1 / 4, epochs=31, )
def train_network(num_pages=1): training_set, vectorizer = vectorize(make_training_set(num_pages)) examples = training_set[:, :-1] labels = training_set[:, -1:] new_examples = np.array([example[0] for example in examples]) new_examples = add_padding(new_examples) training_examples, test_examples, training_labels, test_labels = train_test_split(new_examples, labels, test_size=0.4) input_size = len(new_examples[0]) scale = int(input_size/10 * (2/3))+1 fourth = int(scale/4) thirds = int(scale/3) concat_noisynormdrop_one = Concatenate() >> GaussianNoise(std=1) >> BatchNorm() >> Dropout(proba=.6) concat_noisynormdrop_two = Concatenate()>> GaussianNoise(std=1) >> BatchNorm() >> Dropout(proba=.3) concat_noisynormdrop_three = Concatenate() >> GaussianNoise(std=1) >> BatchNorm() >> Dropout(proba=.3) sub_tri = Elu(fourth) >> Sigmoid(fourth) sub_tri_leaky_relu = LeakyRelu(thirds)>>LeakyRelu(thirds)>>LeakyRelu(thirds) noisy_para_seq = Input(input_size)>>\ Linear(scale)>>\ (Tanh(scale)|Elu(scale)|sub_tri_leaky_relu|sub_tri)>>\ concat_noisynormdrop_one>>\ (Tanh(scale)>>Tanh(scale)|Elu(scale)>>Elu(scale)|Sigmoid(fourth)>>Sigmoid(fourth))>>\ concat_noisynormdrop_two >>\ (Tanh(scale)|Elu(scale)|LeakyRelu(scale)|Sigmoid(scale))>>\ concat_noisynormdrop_three>>\ Sigmoid(1) optimizer = algorithms.Adam( noisy_para_seq, batch_size = 64, shuffle_data=True, loss='binary_crossentropy', verbose=True, regularizer=algorithms.l2(0.001), step=algorithms.step_decay( initial_value=0.10, reduction_freq=10, ) ) optimizer.train(training_examples, training_labels, test_examples, test_labels, epochs=200) prediction = [1 if i > .5 else 0 for i in optimizer.predict(test_examples)] accuracy = [1 if prediction[i] == test_labels[i] else 0 for i in range(len(prediction))].count(1) / len( prediction) print(f'{accuracy * 100:.2f}%') optimizer.plot_errors(show=False) bytes = io.BytesIO() plt.savefig(bytes) bytes.seek(0) encoded = base64.b64encode(bytes.read()) return optimizer, vectorizer, [new_examples[0]], encoded
Convolution((3, 3, 32)) >> Relu(), MaxPooling((2, 2)), Convolution((3, 3, 64)) >> Relu(), Convolution((3, 3, 64)) >> Relu(), MaxPooling((2, 2)), Reshape(), Relu(256) >> Dropout(0.5), Softmax(10), ], step=algorithms.step_decay( initial_value=0.001, # Parameter controls step redution frequency. The larger # the value the slower step parameter decreases. Step will # be reduced after every mini-batch update. In the training # data we have 500 mini-batches. reduction_freq=5 * 500, ), regularizer=algorithms.l2(0.01), loss='categorical_crossentropy', batch_size=100, shuffle_data=True, verbose=True, ) network.train(x_train, y_train, x_test, y_test, epochs=30) y_predicted = network.predict(x_test).argmax(axis=1) y_test_labels = np.asarray(y_test.argmax(axis=1)).reshape(len(y_test))
# It's suitable for classification with 3 and more classes. loss='categorical_crossentropy', # Mini-batch size. It defined how many samples will be propagated # through the network at once. During the training, weights will # be updated after every mini-batch propagation. # Note: When number of training samples is not divisible by 128 # the last mini-batch will have less than 128 samples. batch_size=128, # Step == Learning rate # Step decay algorithm minimizes learning step # monotonically after each weight update. step=algorithms.step_decay( initial_value=0.05, # Parameter controls step redution frequency. The higher # the value the slower step parameter decreases. reduction_freq=500, ), # Shows information about algorithm and # training progress in terminal verbose=True, # Randomly shuffles training dataset before every epoch shuffle_data=True, ) print("Preparing data...") x_train, x_test, y_train, y_test = load_data() # Training network for 4 epochs print("Training...")