def train_model(model, train, test, nb_classes):
    X_train = train[0].reshape((train[0].shape[0], ) + input_shape)
    X_test = test[0].reshape((test[0].shape[0], ) + input_shape)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255
    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(train[1], nb_classes)
    Y_test = np_utils.to_categorical(test[1], nb_classes)

    model = make_model(model,
                       loss='categorical_crossentropy',
                       optimizer='adadelta',
                       metrics=['accuracy'])

    t = now()
    history = model.fit(X_train,
                        Y_train,
                        batch_size=batch_size,
                        nb_epoch=nb_epoch,
                        verbose=1,
                        validation_data=(X_test, Y_test))
    print('Training time: %s' % (now() - t))
    score = model.evaluate(X_test, Y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    return history
Пример #2
0
def make_teacher_model(train_data, validation_data, nb_epoch=3):
    '''Train a simple CNN as teacher model.
    '''
    model = Sequential()
    model.add(
        Conv2D(64,
               3,
               3,
               input_shape=input_shape,
               border_mode='same',
               name='conv1'))
    model.add(MaxPooling2D(name='pool1'))
    model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2'))
    model.add(MaxPooling2D(name='pool2'))
    model.add(Flatten(name='flatten'))
    model.add(Dense(64, activation='relu', name='fc1'))
    model.add(Dense(nb_class, activation='softmax', name='fc2'))
    model = make_model(model,
                       loss='categorical_crossentropy',
                       optimizer=SGD(lr=0.01, momentum=0.9),
                       metrics=['accuracy'])

    train_x, train_y = train_data
    history = model.fit(train_x,
                        train_y,
                        nb_epoch=nb_epoch,
                        validation_data=validation_data)
    return model, history
Пример #3
0
def make_wider_student_model(teacher_model,
                             train_data,
                             validation_data,
                             init,
                             nb_epoch=3):
    '''Train a wider student model based on teacher_model,
       with either 'random-pad' (baseline) or 'net2wider'
    '''
    new_conv1_width = 128
    new_fc1_width = 128

    model = Sequential()
    # a wider conv1 compared to teacher_model
    model.add(
        Conv2D(new_conv1_width,
               3,
               3,
               input_shape=input_shape,
               border_mode='same',
               name='conv1'))
    model.add(MaxPooling2D(name='pool1'))
    model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2'))
    model.add(MaxPooling2D(name='pool2'))
    model.add(Flatten(name='flatten'))
    # a wider fc1 compared to teacher model
    model.add(Dense(new_fc1_width, activation='relu', name='fc1'))
    model.add(Dense(nb_class, activation='softmax', name='fc2'))

    # The weights for other layers need to be copied from teacher_model
    # to student_model, except for widened layers
    # and their immediate downstreams, which will be initialized separately.
    # For this example there are no other layers that need to be copied.

    w_conv1, b_conv1 = teacher_model.get_layer('conv1').get_weights()
    w_conv2, b_conv2 = teacher_model.get_layer('conv2').get_weights()
    new_w_conv1, new_b_conv1, new_w_conv2 = wider2net_conv2d(
        w_conv1, b_conv1, w_conv2, new_conv1_width, init)
    model.get_layer('conv1').set_weights([new_w_conv1, new_b_conv1])
    model.get_layer('conv2').set_weights([new_w_conv2, b_conv2])

    w_fc1, b_fc1 = teacher_model.get_layer('fc1').get_weights()
    w_fc2, b_fc2 = teacher_model.get_layer('fc2').get_weights()
    new_w_fc1, new_b_fc1, new_w_fc2 = wider2net_fc(w_fc1, b_fc1, w_fc2,
                                                   new_fc1_width, init)
    model.get_layer('fc1').set_weights([new_w_fc1, new_b_fc1])
    model.get_layer('fc2').set_weights([new_w_fc2, b_fc2])

    model = make_model(model,
                       loss='categorical_crossentropy',
                       optimizer=SGD(lr=0.001, momentum=0.9),
                       metrics=['accuracy'])

    train_x, train_y = train_data
    history = model.fit(train_x,
                        train_y,
                        nb_epoch=nb_epoch,
                        validation_data=validation_data)
    return model, history
Пример #4
0
def make_mod(dense_layer_sizes, nb_filters, nb_conv, nb_pool):
    '''Creates model comprised of 2 convolutional layers followed by dense layers

    dense_layer_sizes: List of layer sizes. This list has one number for each layer
    nb_filters: Number of convolutional filters in each convolutional layer
    nb_conv: Convolutional kernel size
    nb_pool: Size of pooling area for max pooling
    '''

    model = Sequential()

    model.add(
        Convolution2D(nb_filters,
                      nb_conv,
                      nb_conv,
                      border_mode='valid',
                      input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    for layer_size in dense_layer_sizes:
        model.add(Dense(layer_size))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))

    model = make_model(model,
                       loss='categorical_crossentropy',
                       optimizer='adadelta',
                       metrics=['accuracy'])

    return model
#Result dictionary
global ret_dict
ret_dict = dict()


def vae_loss(x, x_decoded_mean):
    xent_loss = original_dim * objectives.binary_crossentropy(
        x, x_decoded_mean)
    kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var),
                           axis=-1)
    return xent_loss + kl_loss


vae = Model(x, x_decoded_mean)
vae = make_model(vae, optimizer='rmsprop', loss=vae_loss)

# train the VAE on MNIST digits
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))


def train_func():
    history = vae.fit(x_train,
                      x_train,
                      shuffle=True,
                      nb_epoch=nb_epoch,
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model = make_model(model, loss='categorical_crossentropy', optimizer=optimizer)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


# train the model, output generated text after each iteration
def train_func():
    for iteration in range(1, 60):
Y_test = np_utils.to_categorical(y_test, nb_classes)

model = Sequential()
model.add(Dense(512, input_shape=(784, )))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

model.summary()

model = make_model(model,
                   loss='categorical_crossentropy',
                   optimizer=SGD(),
                   metrics=['accuracy'])


def train_func():
    history = model.fit(X_train,
                        Y_train,
                        batch_size=batch_size,
                        nb_epoch=nb_epoch,
                        verbose=1,
                        validation_data=(X_test, Y_test))
    ret_dict["training_accuracy"] = history.history['acc'][-1]
    ret_dict["test_accuracy"] = history.history['val_acc'][-1]


ret = profile(train_func)
    wheres[i] = merge([y_prepool, y], mode=getwhere,
                      output_shape=lambda x: x[0])

# Now build the decoder, and use the stored "where" masks to place the features
for i in range(nlayers):
    ind = nlayers - 1 - i
    y = UpSampling2D(size=(pool_sizes[ind], pool_sizes[ind]))(y)
    y = merge([y, wheres[ind]], mode='mul')
    y = convresblock(y, nfeats=nfeats_all[ind], ksize=ksize)

# Use hard_simgoid to clip range of reconstruction
y = Activation('hard_sigmoid')(y)

# Define the model and it's mean square error loss, and compile it with Adam
model = Model(img_input, y)
model = make_model(model, 'adam', 'mse')

# Fit the model
def train_func():
    history = model.fit(X_train, X_train, validation_data=(X_test, X_test),
              batch_size=batch_size, nb_epoch=nb_epoch)
    ret_dict["training_accuracy"] = history.history['acc'][-1]
    ret_dict["test_accuracy"] = history.history['val_acc'][-1]
ret = profile(train_func)

ret_dict["training_time"] = str(ret[0]) + ' sec'
ret_dict["max_memory"] = str(ret[1]) + ' MB'

# Plot
X_recon = model.predict(X_test[:25])
X_plot = np.concatenate((X_test[:25], X_recon), axis=1)
Пример #9
0
input_a = Input(shape=(input_dim,))
input_b = Input(shape=(input_dim,))

# because we re-use the same instance `base_network`,
# the weights of the network
# will be shared across the two branches
processed_a = base_network(input_a)
processed_b = base_network(input_b)

distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])

model = Model(input=[input_a, input_b], output=distance)

# train
rms = RMSprop()
model = make_model(model, loss=contrastive_loss, optimizer=rms)
def train_func():
    history = model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,
          validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),
          batch_size=128,
          nb_epoch=nb_epoch)
    ret_dict["training_accuracy"] = history.history['acc'][-1]
    ret_dict["test_accuracy"] = history.history['val_acc'][-1]
ret = profile(train_func)

ret_dict["training_time"] = str(ret[0]) + ' sec'
ret_dict["max_memory"] = str(ret[1]) + ' MB'

# compute final accuracy on training and test sets
pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
tr_acc = compute_accuracy(pred, tr_y)
Пример #10
0
def make_deeper_student_model(teacher_model,
                              train_data,
                              validation_data,
                              init,
                              nb_epoch=3):
    '''Train a deeper student model based on teacher_model,
       with either 'random-init' (baseline) or 'net2deeper'
    '''
    model = Sequential()
    model.add(
        Conv2D(64,
               3,
               3,
               input_shape=input_shape,
               border_mode='same',
               name='conv1'))
    model.add(MaxPooling2D(name='pool1'))
    model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2'))
    # add another conv2d layer to make original conv2 deeper
    if init == 'net2deeper':
        prev_w, _ = model.get_layer('conv2').get_weights()
        new_weights = deeper2net_conv2d(prev_w)
        model.add(
            Conv2D(64,
                   3,
                   3,
                   border_mode='same',
                   name='conv2-deeper',
                   weights=new_weights))
    elif init == 'random-init':
        model.add(Conv2D(64, 3, 3, border_mode='same', name='conv2-deeper'))
    else:
        raise ValueError('Unsupported weight initializer: %s' % init)
    model.add(MaxPooling2D(name='pool2'))
    model.add(Flatten(name='flatten'))
    model.add(Dense(64, activation='relu', name='fc1'))
    # add another fc layer to make original fc1 deeper
    if init == 'net2deeper':
        # net2deeper for fc layer with relu, is just an identity initializer
        model.add(
            Dense(64, init='identity', activation='relu', name='fc1-deeper'))
    elif init == 'random-init':
        model.add(Dense(64, activation='relu', name='fc1-deeper'))
    else:
        raise ValueError('Unsupported weight initializer: %s' % init)
    model.add(Dense(nb_class, activation='softmax', name='fc2'))

    # copy weights for other layers
    copy_weights(teacher_model,
                 model,
                 layer_names=['conv1', 'conv2', 'fc1', 'fc2'])

    model = make_model(model,
                       loss='categorical_crossentropy',
                       optimizer=SGD(lr=0.001, momentum=0.9),
                       metrics=['accuracy'])

    train_x, train_y = train_data
    history = model.fit(train_x,
                        train_y,
                        nb_epoch=nb_epoch,
                        validation_data=validation_data)
    return model, history
for i in range(len(cos) - lahead):
    expected_output[i, 0] = np.mean(cos[i + 1:i + lahead + 1])

print('Output shape')
print(expected_output.shape)

print('Creating Model')
model = Sequential()
model.add(
    LSTM(50,
         batch_input_shape=(batch_size, tsteps, 1),
         return_sequences=True,
         stateful=True))
model.add(LSTM(50, return_sequences=False, stateful=True))
model.add(Dense(1))
model = make_model(model, loss='mse', optimizer='rmsprop')

print('Training')


def train_func():
    for i in range(epochs):
        print('Epoch', i, '/', epochs)
        history = model.fit(cos,
                            expected_output,
                            batch_size=batch_size,
                            verbose=1,
                            nb_epoch=1,
                            shuffle=False)
        ret_dict["training_accuracy"] = history.history['acc'][-1]
        ret_dict["test_accuracy"] = history.history['val_acc'][-1]
                   border_mode='same', return_sequences=True))
seq.add(BatchNormalization())

seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
                   border_mode='same', return_sequences=True))
seq.add(BatchNormalization())

seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
                   border_mode='same', return_sequences=True))
seq.add(BatchNormalization())

seq.add(Convolution3D(nb_filter=1, kernel_dim1=1, kernel_dim2=3,
                      kernel_dim3=3, activation='sigmoid',
                      border_mode='same', dim_ordering='tf'))

seq = make_model(seq, loss='binary_crossentropy', optimizer='adadelta')


# Artificial data generation:
# Generate movies with 3 to 7 moving squares inside.
# The squares are of shape 1x1 or 2x2 pixels,
# which move linearly over time.
# For convenience we first create movies with bigger width and height (80x80)
# and at the end we select a 40x40 window.

def generate_movies(n_samples=1200, n_frames=15):
    row = 80
    col = 80
    noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float)
    shifted_movies = np.zeros((n_samples, n_frames, row, col, 1),
                              dtype=np.float)
print('Training model.')

# train a 1D convnet with global maxpooling
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
x = Conv1D(128, 5, activation='relu')(embedded_sequences)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(35)(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
preds = Dense(len(labels_index), activation='softmax')(x)

model = Model(sequence_input, preds)
model = make_model(model, loss='categorical_crossentropy',
                 optimizer='rmsprop',
                 metrics=['acc'])

def train_func():
    # happy learning!
    historymodel.fit(x_train, y_train, validation_data=(x_val, y_val),
                     nb_epoch=2, batch_size=128)
    ret_dict["training_accuracy"] = history.history['acc'][-1]
    ret_dict["test_accuracy"] = history.history['val_acc'][-1]
ret = profile(train_func)

ret_dict["training_time"] = str(ret[0]) + ' sec'
ret_dict["max_memory"] = str(ret[1]) + ' MB'
Пример #14
0
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

model.summary()
make_model(model, loss='categorical_crossentropy', optimizer=SGD(), metrics=['accuracy'])

def train_model():
    history = model.fit(X_train, Y_train,
                    batch_size=batch_size, nb_epoch=nb_epoch,
                    verbose=1, validation_data=(X_test, Y_test))
    profile_output['TRAIN_ACCURACY'] = history.history['acc'][-1]

def test_run():
    # Calling training and profile memory usage
    profile_output["MODEL"] = "MNIST MLP"
    run_time, memory_usage = profile(train_model)

    profile_output['TRAINING_TIME'] = float(run_time)
    profile_output['MEM_CONSUMPTION'] = float(memory_usage)
Пример #15
0
# concatenate the match vector with the question vector,
# and do logistic regression on top
answer = Sequential()
answer.add(Merge([response, question_encoder], mode='concat', concat_axis=-1))
# the original paper uses a matrix multiplication for this reduction step.
# we choose to use a RNN instead.
answer.add(LSTM(32))
# one regularization layer -- more would probably be needed.
answer.add(Dropout(0.3))
answer.add(Dense(vocab_size))
# we output a probability distribution over the vocabulary
answer.add(Activation('softmax'))

answer = make_model(answer,
                    optimizer='rmsprop',
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])


# Note: you could use a Graph model to avoid repeat the input twice
def train_func():
    history = answer.fit(
        [inputs_train, queries_train, inputs_train],
        answers_train,
        batch_size=32,
        nb_epoch=120,
        validation_data=([inputs_test, queries_test,
                          inputs_test], answers_test))
    ret_dict["training_accuracy"] = history.history['acc'][-1]
    ret_dict["test_accuracy"] = history.history['val_acc'][-1]
Пример #16
0
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)

print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128, dropout=0.2))
model.add(LSTM(128, dropout_W=0.2,
               dropout_U=0.2))  # try using a GRU instead, for fun
model.add(Dense(1))
model.add(Activation('sigmoid'))

# try using different optimizers and different optimizer configs
model = make_model(model,
                   loss='binary_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])

print('Train...')


def train_func():
    model.fit(X_train,
              y_train,
              batch_size=batch_size,
              nb_epoch=15,
              validation_data=(X_test, y_test))


ret = profile(train_func)
sentrnn.add(Dropout(0.3))

qrnn = Sequential()
qrnn.add(Embedding(vocab_size, EMBED_HIDDEN_SIZE, input_length=query_maxlen))
qrnn.add(Dropout(0.3))
qrnn.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False))
qrnn.add(RepeatVector(story_maxlen))

model = Sequential()
model.add(Merge([sentrnn, qrnn], mode='sum'))
model.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(vocab_size, activation='softmax'))

model = make_model(model,
                   optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

print('Training')


def train_func():
    model.fit([X, Xq],
              Y,
              batch_size=BATCH_SIZE,
              nb_epoch=EPOCHS,
              validation_split=0.05)


ret = profile(train_func)
def train(run_name, start_epoch, stop_epoch, img_w):
    # Input Parameters
    img_h = 64
    words_per_epoch = 16000
    val_split = 0.2
    val_words = int(words_per_epoch * (val_split))

    # Network parameters
    conv_num_filters = 16
    filter_size = 3
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512

    if K.image_dim_ordering() == 'th':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

    fdir = os.path.dirname(
        get_file('wordlists.tgz',
                 origin='http://www.isosemi.com/datasets/wordlists.tgz',
                 untar=True))

    img_gen = TextImageGenerator(
        monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
        bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
        minibatch_size=32,
        img_w=img_w,
        img_h=img_h,
        downsample_factor=(pool_size**2),
        val_split=words_per_epoch - val_words)
    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Convolution2D(conv_num_filters,
                          filter_size,
                          filter_size,
                          border_mode='same',
                          activation=act,
                          init='he_normal',
                          name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Convolution2D(conv_num_filters,
                          filter_size,
                          filter_size,
                          border_mode='same',
                          activation=act,
                          init='he_normal',
                          name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (img_w // (pool_size**2),
                        (img_h // (pool_size**2)) * conv_num_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirecitonal GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size, return_sequences=True, init='he_normal',
                name='gru1')(inner)
    gru_1b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 init='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = merge([gru_1, gru_1b], mode='sum')
    gru_2 = GRU(rnn_size, return_sequences=True, init='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 init='he_normal',
                 name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(img_gen.get_output_size(), init='he_normal',
                  name='dense2')(merge([gru_2, gru_2b], mode='concat'))
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(input=[input_data], output=y_pred).summary()

    labels = Input(name='the_labels',
                   shape=[img_gen.absolute_max_string_len],
                   dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    model = Model(input=[input_data, labels, input_length, label_length],
                  output=[loss_out])

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model = make_model(model,
                       loss={
                           'ctc': lambda y_true, y_pred: y_pred
                       },
                       optimizer=sgd)
    if start_epoch > 0:
        weight_file = os.path.join(
            OUTPUT_DIR,
            os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
        model.load_weights(weight_file)
    # captures output of softmax so we can decode the output during visualization
    test_func = K.function([input_data], [y_pred])

    viz_cb = VizCallback(run_name, test_func, img_gen.next_val())

    def train_func():
        history = model.fit_generator(generator=img_gen.next_train(),
                                      samples_per_epoch=(words_per_epoch -
                                                         val_words),
                                      nb_epoch=stop_epoch,
                                      validation_data=img_gen.next_val(),
                                      nb_val_samples=val_words,
                                      callbacks=[viz_cb, img_gen],
                                      initial_epoch=start_epoch)
        ret_dict["training_accuracy"] = history.history['acc'][-1]
        ret_dict["test_accuracy"] = history.history['val_acc'][-1]

    ret = profile(train_func)

    ret_dict["training_time"] = str(ret[0]) + ' sec'
    ret_dict["max_memory"] = str(ret[1]) + ' MB'