def train_model(model, train, test, nb_classes): X_train = train[0].reshape((train[0].shape[0], ) + input_shape) X_test = test[0].reshape((test[0].shape[0], ) + input_shape) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(train[1], nb_classes) Y_test = np_utils.to_categorical(test[1], nb_classes) model = make_model(model, loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) t = now() history = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) print('Training time: %s' % (now() - t)) score = model.evaluate(X_test, Y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) return history
def make_teacher_model(train_data, validation_data, nb_epoch=3): '''Train a simple CNN as teacher model. ''' model = Sequential() model.add( Conv2D(64, 3, 3, input_shape=input_shape, border_mode='same', name='conv1')) model.add(MaxPooling2D(name='pool1')) model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2')) model.add(MaxPooling2D(name='pool2')) model.add(Flatten(name='flatten')) model.add(Dense(64, activation='relu', name='fc1')) model.add(Dense(nb_class, activation='softmax', name='fc2')) model = make_model(model, loss='categorical_crossentropy', optimizer=SGD(lr=0.01, momentum=0.9), metrics=['accuracy']) train_x, train_y = train_data history = model.fit(train_x, train_y, nb_epoch=nb_epoch, validation_data=validation_data) return model, history
def make_wider_student_model(teacher_model, train_data, validation_data, init, nb_epoch=3): '''Train a wider student model based on teacher_model, with either 'random-pad' (baseline) or 'net2wider' ''' new_conv1_width = 128 new_fc1_width = 128 model = Sequential() # a wider conv1 compared to teacher_model model.add( Conv2D(new_conv1_width, 3, 3, input_shape=input_shape, border_mode='same', name='conv1')) model.add(MaxPooling2D(name='pool1')) model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2')) model.add(MaxPooling2D(name='pool2')) model.add(Flatten(name='flatten')) # a wider fc1 compared to teacher model model.add(Dense(new_fc1_width, activation='relu', name='fc1')) model.add(Dense(nb_class, activation='softmax', name='fc2')) # The weights for other layers need to be copied from teacher_model # to student_model, except for widened layers # and their immediate downstreams, which will be initialized separately. # For this example there are no other layers that need to be copied. w_conv1, b_conv1 = teacher_model.get_layer('conv1').get_weights() w_conv2, b_conv2 = teacher_model.get_layer('conv2').get_weights() new_w_conv1, new_b_conv1, new_w_conv2 = wider2net_conv2d( w_conv1, b_conv1, w_conv2, new_conv1_width, init) model.get_layer('conv1').set_weights([new_w_conv1, new_b_conv1]) model.get_layer('conv2').set_weights([new_w_conv2, b_conv2]) w_fc1, b_fc1 = teacher_model.get_layer('fc1').get_weights() w_fc2, b_fc2 = teacher_model.get_layer('fc2').get_weights() new_w_fc1, new_b_fc1, new_w_fc2 = wider2net_fc(w_fc1, b_fc1, w_fc2, new_fc1_width, init) model.get_layer('fc1').set_weights([new_w_fc1, new_b_fc1]) model.get_layer('fc2').set_weights([new_w_fc2, b_fc2]) model = make_model(model, loss='categorical_crossentropy', optimizer=SGD(lr=0.001, momentum=0.9), metrics=['accuracy']) train_x, train_y = train_data history = model.fit(train_x, train_y, nb_epoch=nb_epoch, validation_data=validation_data) return model, history
def make_mod(dense_layer_sizes, nb_filters, nb_conv, nb_pool): '''Creates model comprised of 2 convolutional layers followed by dense layers dense_layer_sizes: List of layer sizes. This list has one number for each layer nb_filters: Number of convolutional filters in each convolutional layer nb_conv: Convolutional kernel size nb_pool: Size of pooling area for max pooling ''' model = Sequential() model.add( Convolution2D(nb_filters, nb_conv, nb_conv, border_mode='valid', input_shape=input_shape)) model.add(Activation('relu')) model.add(Convolution2D(nb_filters, nb_conv, nb_conv)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) model.add(Dropout(0.25)) model.add(Flatten()) for layer_size in dense_layer_sizes: model.add(Dense(layer_size)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model = make_model(model, loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) return model
#Result dictionary global ret_dict ret_dict = dict() def vae_loss(x, x_decoded_mean): xent_loss = original_dim * objectives.binary_crossentropy( x, x_decoded_mean) kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return xent_loss + kl_loss vae = Model(x, x_decoded_mean) vae = make_model(vae, optimizer='rmsprop', loss=vae_loss) # train the VAE on MNIST digits (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.astype('float32') / 255. x_test = x_test.astype('float32') / 255. x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) def train_func(): history = vae.fit(x_train, x_train, shuffle=True, nb_epoch=nb_epoch,
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) y = np.zeros((len(sentences), len(chars)), dtype=np.bool) for i, sentence in enumerate(sentences): for t, char in enumerate(sentence): X[i, t, char_indices[char]] = 1 y[i, char_indices[next_chars[i]]] = 1 # build the model: a single LSTM print('Build model...') model = Sequential() model.add(LSTM(128, input_shape=(maxlen, len(chars)))) model.add(Dense(len(chars))) model.add(Activation('softmax')) optimizer = RMSprop(lr=0.01) model = make_model(model, loss='categorical_crossentropy', optimizer=optimizer) def sample(preds, temperature=1.0): # helper function to sample an index from a probability array preds = np.asarray(preds).astype('float64') preds = np.log(preds) / temperature exp_preds = np.exp(preds) preds = exp_preds / np.sum(exp_preds) probas = np.random.multinomial(1, preds, 1) return np.argmax(probas) # train the model, output generated text after each iteration def train_func(): for iteration in range(1, 60):
Y_test = np_utils.to_categorical(y_test, nb_classes) model = Sequential() model.add(Dense(512, input_shape=(784, ))) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(10)) model.add(Activation('softmax')) model.summary() model = make_model(model, loss='categorical_crossentropy', optimizer=SGD(), metrics=['accuracy']) def train_func(): history = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) ret_dict["training_accuracy"] = history.history['acc'][-1] ret_dict["test_accuracy"] = history.history['val_acc'][-1] ret = profile(train_func)
wheres[i] = merge([y_prepool, y], mode=getwhere, output_shape=lambda x: x[0]) # Now build the decoder, and use the stored "where" masks to place the features for i in range(nlayers): ind = nlayers - 1 - i y = UpSampling2D(size=(pool_sizes[ind], pool_sizes[ind]))(y) y = merge([y, wheres[ind]], mode='mul') y = convresblock(y, nfeats=nfeats_all[ind], ksize=ksize) # Use hard_simgoid to clip range of reconstruction y = Activation('hard_sigmoid')(y) # Define the model and it's mean square error loss, and compile it with Adam model = Model(img_input, y) model = make_model(model, 'adam', 'mse') # Fit the model def train_func(): history = model.fit(X_train, X_train, validation_data=(X_test, X_test), batch_size=batch_size, nb_epoch=nb_epoch) ret_dict["training_accuracy"] = history.history['acc'][-1] ret_dict["test_accuracy"] = history.history['val_acc'][-1] ret = profile(train_func) ret_dict["training_time"] = str(ret[0]) + ' sec' ret_dict["max_memory"] = str(ret[1]) + ' MB' # Plot X_recon = model.predict(X_test[:25]) X_plot = np.concatenate((X_test[:25], X_recon), axis=1)
input_a = Input(shape=(input_dim,)) input_b = Input(shape=(input_dim,)) # because we re-use the same instance `base_network`, # the weights of the network # will be shared across the two branches processed_a = base_network(input_a) processed_b = base_network(input_b) distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b]) model = Model(input=[input_a, input_b], output=distance) # train rms = RMSprop() model = make_model(model, loss=contrastive_loss, optimizer=rms) def train_func(): history = model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y), batch_size=128, nb_epoch=nb_epoch) ret_dict["training_accuracy"] = history.history['acc'][-1] ret_dict["test_accuracy"] = history.history['val_acc'][-1] ret = profile(train_func) ret_dict["training_time"] = str(ret[0]) + ' sec' ret_dict["max_memory"] = str(ret[1]) + ' MB' # compute final accuracy on training and test sets pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) tr_acc = compute_accuracy(pred, tr_y)
def make_deeper_student_model(teacher_model, train_data, validation_data, init, nb_epoch=3): '''Train a deeper student model based on teacher_model, with either 'random-init' (baseline) or 'net2deeper' ''' model = Sequential() model.add( Conv2D(64, 3, 3, input_shape=input_shape, border_mode='same', name='conv1')) model.add(MaxPooling2D(name='pool1')) model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2')) # add another conv2d layer to make original conv2 deeper if init == 'net2deeper': prev_w, _ = model.get_layer('conv2').get_weights() new_weights = deeper2net_conv2d(prev_w) model.add( Conv2D(64, 3, 3, border_mode='same', name='conv2-deeper', weights=new_weights)) elif init == 'random-init': model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2-deeper')) else: raise ValueError('Unsupported weight initializer: %s' % init) model.add(MaxPooling2D(name='pool2')) model.add(Flatten(name='flatten')) model.add(Dense(64, activation='relu', name='fc1')) # add another fc layer to make original fc1 deeper if init == 'net2deeper': # net2deeper for fc layer with relu, is just an identity initializer model.add( Dense(64, init='identity', activation='relu', name='fc1-deeper')) elif init == 'random-init': model.add(Dense(64, activation='relu', name='fc1-deeper')) else: raise ValueError('Unsupported weight initializer: %s' % init) model.add(Dense(nb_class, activation='softmax', name='fc2')) # copy weights for other layers copy_weights(teacher_model, model, layer_names=['conv1', 'conv2', 'fc1', 'fc2']) model = make_model(model, loss='categorical_crossentropy', optimizer=SGD(lr=0.001, momentum=0.9), metrics=['accuracy']) train_x, train_y = train_data history = model.fit(train_x, train_y, nb_epoch=nb_epoch, validation_data=validation_data) return model, history
for i in range(len(cos) - lahead): expected_output[i, 0] = np.mean(cos[i + 1:i + lahead + 1]) print('Output shape') print(expected_output.shape) print('Creating Model') model = Sequential() model.add( LSTM(50, batch_input_shape=(batch_size, tsteps, 1), return_sequences=True, stateful=True)) model.add(LSTM(50, return_sequences=False, stateful=True)) model.add(Dense(1)) model = make_model(model, loss='mse', optimizer='rmsprop') print('Training') def train_func(): for i in range(epochs): print('Epoch', i, '/', epochs) history = model.fit(cos, expected_output, batch_size=batch_size, verbose=1, nb_epoch=1, shuffle=False) ret_dict["training_accuracy"] = history.history['acc'][-1] ret_dict["test_accuracy"] = history.history['val_acc'][-1]
border_mode='same', return_sequences=True)) seq.add(BatchNormalization()) seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3, border_mode='same', return_sequences=True)) seq.add(BatchNormalization()) seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3, border_mode='same', return_sequences=True)) seq.add(BatchNormalization()) seq.add(Convolution3D(nb_filter=1, kernel_dim1=1, kernel_dim2=3, kernel_dim3=3, activation='sigmoid', border_mode='same', dim_ordering='tf')) seq = make_model(seq, loss='binary_crossentropy', optimizer='adadelta') # Artificial data generation: # Generate movies with 3 to 7 moving squares inside. # The squares are of shape 1x1 or 2x2 pixels, # which move linearly over time. # For convenience we first create movies with bigger width and height (80x80) # and at the end we select a 40x40 window. def generate_movies(n_samples=1200, n_frames=15): row = 80 col = 80 noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float) shifted_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float)
print('Training model.') # train a 1D convnet with global maxpooling sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embedded_sequences = embedding_layer(sequence_input) x = Conv1D(128, 5, activation='relu')(embedded_sequences) x = MaxPooling1D(5)(x) x = Conv1D(128, 5, activation='relu')(x) x = MaxPooling1D(5)(x) x = Conv1D(128, 5, activation='relu')(x) x = MaxPooling1D(35)(x) x = Flatten()(x) x = Dense(128, activation='relu')(x) preds = Dense(len(labels_index), activation='softmax')(x) model = Model(sequence_input, preds) model = make_model(model, loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) def train_func(): # happy learning! historymodel.fit(x_train, y_train, validation_data=(x_val, y_val), nb_epoch=2, batch_size=128) ret_dict["training_accuracy"] = history.history['acc'][-1] ret_dict["test_accuracy"] = history.history['val_acc'][-1] ret = profile(train_func) ret_dict["training_time"] = str(ret[0]) + ' sec' ret_dict["max_memory"] = str(ret[1]) + ' MB'
# convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) model = Sequential() model.add(Dense(512, input_shape=(784,))) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(10)) model.add(Activation('softmax')) model.summary() make_model(model, loss='categorical_crossentropy', optimizer=SGD(), metrics=['accuracy']) def train_model(): history = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) profile_output['TRAIN_ACCURACY'] = history.history['acc'][-1] def test_run(): # Calling training and profile memory usage profile_output["MODEL"] = "MNIST MLP" run_time, memory_usage = profile(train_model) profile_output['TRAINING_TIME'] = float(run_time) profile_output['MEM_CONSUMPTION'] = float(memory_usage)
# concatenate the match vector with the question vector, # and do logistic regression on top answer = Sequential() answer.add(Merge([response, question_encoder], mode='concat', concat_axis=-1)) # the original paper uses a matrix multiplication for this reduction step. # we choose to use a RNN instead. answer.add(LSTM(32)) # one regularization layer -- more would probably be needed. answer.add(Dropout(0.3)) answer.add(Dense(vocab_size)) # we output a probability distribution over the vocabulary answer.add(Activation('softmax')) answer = make_model(answer, optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) # Note: you could use a Graph model to avoid repeat the input twice def train_func(): history = answer.fit( [inputs_train, queries_train, inputs_train], answers_train, batch_size=32, nb_epoch=120, validation_data=([inputs_test, queries_test, inputs_test], answers_test)) ret_dict["training_accuracy"] = history.history['acc'][-1] ret_dict["test_accuracy"] = history.history['val_acc'][-1]
X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) print('Build model...') model = Sequential() model.add(Embedding(max_features, 128, dropout=0.2)) model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2)) # try using a GRU instead, for fun model.add(Dense(1)) model.add(Activation('sigmoid')) # try using different optimizers and different optimizer configs model = make_model(model, loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('Train...') def train_func(): model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, validation_data=(X_test, y_test)) ret = profile(train_func)
sentrnn.add(Dropout(0.3)) qrnn = Sequential() qrnn.add(Embedding(vocab_size, EMBED_HIDDEN_SIZE, input_length=query_maxlen)) qrnn.add(Dropout(0.3)) qrnn.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False)) qrnn.add(RepeatVector(story_maxlen)) model = Sequential() model.add(Merge([sentrnn, qrnn], mode='sum')) model.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False)) model.add(Dropout(0.3)) model.add(Dense(vocab_size, activation='softmax')) model = make_model(model, optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print('Training') def train_func(): model.fit([X, Xq], Y, batch_size=BATCH_SIZE, nb_epoch=EPOCHS, validation_split=0.05) ret = profile(train_func)
def train(run_name, start_epoch, stop_epoch, img_w): # Input Parameters img_h = 64 words_per_epoch = 16000 val_split = 0.2 val_words = int(words_per_epoch * (val_split)) # Network parameters conv_num_filters = 16 filter_size = 3 pool_size = 2 time_dense_size = 32 rnn_size = 512 if K.image_dim_ordering() == 'th': input_shape = (1, img_w, img_h) else: input_shape = (img_w, img_h, 1) fdir = os.path.dirname( get_file('wordlists.tgz', origin='http://www.isosemi.com/datasets/wordlists.tgz', untar=True)) img_gen = TextImageGenerator( monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'), bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'), minibatch_size=32, img_w=img_w, img_h=img_h, downsample_factor=(pool_size**2), val_split=words_per_epoch - val_words) act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same', activation=act, init='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same', activation=act, init='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size**2), (img_h // (pool_size**2)) * conv_num_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirecitonal GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, init='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, init='he_normal', name='gru1_b')(inner) gru1_merged = merge([gru_1, gru_1b], mode='sum') gru_2 = GRU(rnn_size, return_sequences=True, init='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, init='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(img_gen.get_output_size(), init='he_normal', name='dense2')(merge([gru_2, gru_2b], mode='concat')) y_pred = Activation('softmax', name='softmax')(inner) Model(input=[input_data], output=y_pred).summary() labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = Model(input=[input_data, labels, input_length, label_length], output=[loss_out]) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model = make_model(model, loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=sgd) if start_epoch > 0: weight_file = os.path.join( OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) model.load_weights(weight_file) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) viz_cb = VizCallback(run_name, test_func, img_gen.next_val()) def train_func(): history = model.fit_generator(generator=img_gen.next_train(), samples_per_epoch=(words_per_epoch - val_words), nb_epoch=stop_epoch, validation_data=img_gen.next_val(), nb_val_samples=val_words, callbacks=[viz_cb, img_gen], initial_epoch=start_epoch) ret_dict["training_accuracy"] = history.history['acc'][-1] ret_dict["test_accuracy"] = history.history['val_acc'][-1] ret = profile(train_func) ret_dict["training_time"] = str(ret[0]) + ' sec' ret_dict["max_memory"] = str(ret[1]) + ' MB'