Exemplo n.º 1
0
def define_epinet(sz_input,sz_input2,view_n,conv_depth,filt_num,learning_rate):

    ''' 4-Input : Conv - Relu - Conv - BN - Relu ''' 
    input_stack_90d = Input(shape=(sz_input,sz_input2,len(view_n)), name='input_stack_90d')
    input_stack_0d= Input(shape=(sz_input,sz_input2,len(view_n)), name='input_stack_0d')
    input_stack_45d= Input(shape=(sz_input,sz_input2,len(view_n)), name='input_stack_45d')
    input_stack_M45d= Input(shape=(sz_input,sz_input2,len(view_n)), name='input_stack_M45d')
    
    ''' 4-Stream layer : Conv - Relu - Conv - BN - Relu ''' 
    mid_90d=layer1_multistream(sz_input,sz_input2,len(view_n),int(filt_num))(input_stack_90d)
    mid_0d=layer1_multistream(sz_input,sz_input2,len(view_n),int(filt_num))(input_stack_0d)    
    mid_45d=layer1_multistream(sz_input,sz_input2,len(view_n),int(filt_num))(input_stack_45d)    
    mid_M45d=layer1_multistream(sz_input,sz_input2,len(view_n),int(filt_num))(input_stack_M45d)   

    ''' Merge layers ''' 
    mid_merged = concatenate([mid_90d,mid_0d,mid_45d,mid_M45d],  name='mid_merged')
    
    ''' Merged layer : Conv - Relu - Conv - BN - Relu '''
    mid_merged_=layer2_merged(sz_input-6,sz_input2-6,int(4*filt_num),int(4*filt_num),conv_depth)(mid_merged)

    ''' Last Conv layer : Conv - Relu - Conv '''
    output=layer3_last(sz_input-18,sz_input2-18,int(4*filt_num),int(4*filt_num))(mid_merged_)

    model_512 = Model(inputs = [input_stack_90d,input_stack_0d,
                               input_stack_45d,input_stack_M45d], outputs = [output])
    opt = RMSprop(lr=learning_rate)
    model_512.compile(optimizer=opt, loss='mae')
    model_512.summary() 
    
    return model_512
Exemplo n.º 2
0
    def build_model(self):
        input = Input(shape=self.state_size)
        conv = TimeDistributed(
            Conv2D(32, (8, 8), strides=(4, 4), activation="elu"))(input)
        conv = TimeDistributed(
            Conv2D(32, (4, 4), strides=(2, 2), activation="elu"))(conv)
        conv = TimeDistributed(
            Conv2D(32, (3, 3), strides=(1, 1), activation='elu'))(conv)
        conv = TimeDistributed(
            Conv2D(8, (1, 1), strides=(1, 1), activation='elu'))(conv)
        conv = TimeDistributed(Flatten())(conv)
        conv = BatchNormalization()(conv)
        lstm = GRU(256, activation='tanh')(conv)

        policy = Dense(self.action_size, activation="softmax")(lstm)
        value = Dense(1, activation='linear')(lstm)

        actor = Model(inputs=input, outputs=policy)
        critic = Model(inputs=input, outputs=value)

        actor._make_predict_function()
        critic._make_predict_function()

        actor.summary()
        critic.summary()

        return actor, critic
Exemplo n.º 3
0
    def __init__(self):
        self.growth_rate = 32
        in_ = Input(shape=(224, 224, 3))
        self.num_dense_block = 4

        # Layer 1:
        x = Conv2D(64, (7, 7), (2, 2), padding="SAME")(in_)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        x = MaxPool2D((3, 3), (2, 2), padding="VALID")(x)

        filter = 64
        num_node_each_layer = [6, 12, 24, 16]
        for i in range(self.num_dense_block):
            x, filter = dense_block(x,
                                    num_node_each_layer[i],
                                    filter,
                                    growth_rate=32)
            if i != self.num_dense_block - 1:
                x = transition_block(x, filter, 1.0)
                filter = filter * 1.0

        # Output from loop statement, x still in conv layer
        x = BatchNormalization()(x)
        x = Activation("relu")(x)

        x = GlobalAveragePooling2D()(x)
        x = Dense(1000, activation="softmax")(x)
        model = Model(inputs=in_, outputs=x)
        model.summary()
        self.model = model
Exemplo n.º 4
0
    def inference(self, mode='simple'):

        left_image = Input(shape=(self.model_in_height, self.model_in_width,
                                  self.model_in_depth),
                           name='left_input')
        right_image = Input(shape=(self.model_in_height, self.model_in_width,
                                   self.model_in_depth),
                            name='right_image')

        if mode == 'simple':
            concate_view = concatenate([left_image, right_image],
                                       axis=3,
                                       name='concate_view')
            prediction = self.FlowNetSimple(concate_view)

            FlowNet = Model(inputs=[left_image, right_image],
                            outputs=[prediction])
            opt = Adam(lr=self.learning_rate)
            FlowNet.compile(optimizer=opt, loss='mae')
            FlowNet.summary()

            return FlowNet

        if mode == 'correlation':
            prediction = self.FlowNetCorr(left_image, right_image)

            FlowNet = Model(inputs=[left_image, right_image],
                            outputs=[prediction])
            opt = Adam(lr=self.learning_rate)
            FlowNet.compile(optimizer=opt, loss='mae')
            FlowNet.summary()

            return FlowNet
Exemplo n.º 5
0
def get_model(filters_count, conv_depth, learning_rate, input_shape=(512, 512, 9)):
    # input shape=512x512x9 ?灰度化的9个图?
    input_90d = Input(shape=input_shape, name='input_90d')
    input_0d = Input(shape=input_shape, name='input_0d')
    input_45d = Input(shape=input_shape, name='input_45d')
    input_m45d = Input(shape=input_shape, name='input_m45d')

    # 4 Stream layer
    stream_ver = layersP1_multistream(input_shape, int(filters_count))(input_90d)
    stream_hor = layersP1_multistream(input_shape, int(filters_count))(input_0d)
    stream_45d = layersP1_multistream(input_shape, int(filters_count))(input_45d)
    stream_m45d = layersP1_multistream(input_shape, int(filters_count))(input_m45d)

    # merge streams
    merged = concatenate([stream_ver,stream_hor, stream_45d, stream_m45d], name='merged')

    # layers part2: conv-relu-bn-conv-relu
    merged = layersP2_merged(input_shape=(input_shape[0], input_shape[1], int(filters_count) * 4),
                             filters_count=int(filters_count) * 4,
                             conv_depth=conv_depth)(merged)

    # output
    output = layersP3_output(input_shape=(input_shape[0], input_shape[1], int(filters_count) * 4),
                             filters_count=int(filters_count) * 4)(merged)

    mymodel = Model(inputs=[input_90d,input_0d, input_45d, input_m45d], outputs=[output])

    optimizer = RMSprop(lr=learning_rate)
    mymodel.compile(optimizer=optimizer, loss='mae')
    mymodel.summary()

    return mymodel
Exemplo n.º 6
0
def define_AttMLFNet(sz_input, sz_input2, view_n, learning_rate):
    """ 4 branches inputs"""
    input_list = []
    for i in range(len(view_n) * 4):
        input_list.append(Input(shape=(sz_input, sz_input2, 1)))
    """ 4 branches features"""
    feature_extraction_layer = feature_extraction(sz_input, sz_input2)
    feature_list = []
    for i in range(len(view_n) * 4):
        feature_list.append(feature_extraction_layer(input_list[i]))
    feature_v_list = []
    feature_h_list = []
    feature_45_list = []
    feature_135_list = []
    for i in range(9):
        feature_h_list.append(feature_list[i])
    for i in range(9, 18):
        feature_v_list.append(feature_list[i])
    for i in range(18, 27):
        feature_45_list.append(feature_list[i])
    for i in range(27, len(feature_list)):
        feature_135_list.append(feature_list[i])
    """ cost volume """
    cv_h = Lambda(_get_h_CostVolume_)(feature_h_list)
    cv_v = Lambda(_get_v_CostVolume_)(feature_v_list)
    cv_45 = Lambda(_get_45_CostVolume_)(feature_45_list)
    cv_135 = Lambda(_get_135_CostVolume_)(feature_135_list)
    """ intra branch """
    cv_h_3d, cv_h_ca = to_3d_h(cv_h)
    cv_v_3d, cv_v_ca = to_3d_v(cv_v)
    cv_45_3d, cv_45_ca = to_3d_45(cv_45)
    cv_135_3d, cv_135_ca = to_3d_135(cv_135)
    """ inter branch """
    cv, attention_4 = branch_attention(
        multiply([cv_h_3d, cv_v_3d, cv_45_3d, cv_135_3d]), cv_h_ca, cv_v_ca,
        cv_45_ca, cv_135_ca)
    """ cost volume regression """
    cost = basic(cv)

    cost = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1),
                                                 (0, 2, 3, 1)))(cost)
    pred = Activation('softmax')(cost)
    pred = Lambda(disparityregression)(pred)

    model = Model(inputs=input_list, outputs=[pred])

    model.summary()

    opt = Adam(lr=learning_rate)

    model.compile(optimizer=opt, loss='mae')

    return model
Exemplo n.º 7
0
    def inference(self):
        ''' 4-Input : Conv - Relu - Conv - BN - Relu '''
        input_stack_90d = Input(shape=(self.img_height, self.img_width,
                                       len(self.view_n)),
                                name='input_stack_90d')
        input_stack_0d = Input(shape=(self.img_height, self.img_width,
                                      len(self.view_n)),
                               name='input_stack_0d')
        input_stack_45d = Input(shape=(self.img_height, self.img_width,
                                       len(self.view_n)),
                                name='input_stack_45d')
        input_stack_M45d = Input(shape=(self.img_height, self.img_width,
                                        len(self.view_n)),
                                 name='input_stack_M45d')
        ''' 4-Stream layer : Conv - Relu - Conv - BN - Relu '''
        mid_90d = self.layer1_multistream(self.img_height, self.img_width,
                                          len(self.view_n),
                                          int(self.filt_num))(input_stack_90d)
        mid_0d = self.layer1_multistream(self.img_height, self.img_width,
                                         len(self.view_n),
                                         int(self.filt_num))(input_stack_0d)
        mid_45d = self.layer1_multistream(self.img_height, self.img_width,
                                          len(self.view_n),
                                          int(self.filt_num))(input_stack_45d)
        mid_M45d = self.layer1_multistream(
            self.img_height, self.img_width, len(self.view_n),
            int(self.filt_num))(input_stack_M45d)
        ''' Merge layers '''
        mid_merged = concatenate([mid_90d, mid_0d, mid_45d, mid_M45d],
                                 name='mid_merged')
        ''' Merged layer : Conv - Relu - Conv - BN - Relu '''
        mid_merged_ = self.layer2_merged(self.img_height - 6,
                                         self.img_width - 6,
                                         int(4 * self.filt_num),
                                         int(4 * self.filt_num),
                                         self.conv_depth)(mid_merged)
        ''' Last Conv layer : Conv - Relu - Conv '''
        output = self.layer3_last(self.img_height - 18, self.img_width - 18,
                                  int(4 * self.filt_num),
                                  int(4 * self.filt_num))(mid_merged_)

        epinet = Model(inputs=[
            input_stack_90d, input_stack_0d, input_stack_45d, input_stack_M45d
        ],
                       outputs=[output])
        opt = RMSprop(lr=self.learning_rate)
        epinet.compile(optimizer=opt, loss='mae')
        epinet.summary()

        return epinet
Exemplo n.º 8
0
def define_cepinet(sz_input,sz_input2,view_n,conv_depth,filt_num,learning_rate,for_vis = False):
    global feats
    if for_vis:
        feats = []
    else:
        feats = None
    ''' 4-Input : Conv - Relu - Conv - BN - Relu ''' 
    input_stack_90d = Input(shape=(sz_input,sz_input2,len(view_n)), name='input_stack_90d')
    input_stack_0d= Input(shape=(sz_input,sz_input2,len(view_n)), name='input_stack_0d')
#    input_stack_45d= Input(shape=(sz_input,sz_input2,len(view_n)), name='input_stack_45d')
    input_stack_M45d= Input(shape=(sz_input,sz_input2,len(view_n)), name='input_stack_M45d')
    num_stacks = 3
    with tf.variable_scope("4-Stream"):
        ''' 4-Stream layer : Conv - Relu - Conv - BN - Relu ''' 
        mid_90d = layer1_multistream(sz_input,sz_input2,len(view_n),int(filt_num),do_vis=True,name="90d")(input_stack_90d)
        mid_0d = layer1_multistream(sz_input,sz_input2,len(view_n),int(filt_num),do_vis=True,name="0d")(input_stack_0d)    
    #    mid_45d=layer1_multistream(sz_input,sz_input2,len(view_n),int(filt_num))(input_stack_45d)    
        mid_M45d = layer1_multistream(sz_input,sz_input2,len(view_n),int(filt_num),do_vis=True,name="M45d")(input_stack_M45d)   

    with tf.variable_scope("Merge"):
        ''' Merge layers ''' 
        mid_merged = concatenate([mid_90d,mid_0d,mid_M45d],  name='mid_merged')
        
        ''' Merged layer : Conv - Relu - Conv - BN - Relu '''
        mid_merged_=layer2_merged(sz_input-6,sz_input2-6,int(num_stacks*filt_num),int(num_stacks*filt_num),conv_depth)(mid_merged)

    with tf.variable_scope("Last"):
        ''' Last Conv layer : Conv - Relu - Conv '''
        output=layer3_last(sz_input-18,sz_input2-18,int(num_stacks*filt_num),int(num_stacks*filt_num))(mid_merged_)

    if for_vis:
        feat_outs90d = [feat(input_stack_90d) for feat in feats[0:6]]
        feat_outs0d =  [feat(input_stack_0d) for feat in feats[6:12]]
        feat_outsM45d = [feat(input_stack_M45d) for feat in feats[12:18]]
        outputs = feat_outs90d + feat_outs0d + feat_outsM45d + [output]

    else: 
        outputs = [output]
    model_512 = Model(inputs = [input_stack_90d,input_stack_0d,
#                               input_stack_45d,
                               input_stack_M45d], outputs = outputs)
    opt = RMSprop(lr=learning_rate)
    model_512.compile(optimizer=opt, loss='mae')
    model_512.summary() 
    
    return model_512, feat_names
Exemplo n.º 9
0
def define_LFattNet(sz_input, sz_input2, view_n, learning_rate):
    """ 81 inputs"""
    input_list = []
    for i in range(len(view_n) * len(view_n)):
        print('input ' + str(i))
        input_list.append(Input(shape=(sz_input, sz_input2, 1)))
    """ 81 features"""
    feature_extraction_layer = feature_extraction(sz_input, sz_input2)

    feature_list = []
    for i in range(len(view_n) * len(view_n)):
        print('feature ' + str(i))
        feature_list.append(feature_extraction_layer(input_list[i]))
    """ cost volume """
    cv = Lambda(_getCostVolume_)(feature_list)
    """ channel attention """
    cv, attention = channel_attention(cv)
    """ cost volume regression """
    cost = basic(cv)
    cost = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1),
                                                 (0, 2, 3, 1)))(cost)
    pred = Activation('softmax')(cost)

    pred = Lambda(disparityregression)(pred)

    # when training use below
    # model = Model(inputs=input_list, outputs=[pred])

    # when evaluation use below
    model = Model(inputs=input_list, outputs=[pred, attention])

    model.summary()

    opt = Adam(lr=learning_rate)

    model.compile(optimizer=opt, loss='mae')

    return model
Exemplo n.º 10
0
def define_epinet(sz_input, sz_input2, view_n, conv_depth, filt_num,
                  learning_rate):
    global feats
    ''' 4-Input : Conv - Relu - Conv - BN - Relu '''
    input_stack_90d = Input(shape=(sz_input, sz_input2, len(view_n)),
                            name='input_stack_90d')
    input_stack_0d = Input(shape=(sz_input, sz_input2, len(view_n)),
                           name='input_stack_0d')
    input_stack_45d = Input(shape=(sz_input, sz_input2, len(view_n)),
                            name='input_stack_45d')
    input_stack_M45d = Input(shape=(sz_input, sz_input2, len(view_n)),
                             name='input_stack_M45d')
    ''' 4-Stream layer : Conv - Relu - Conv - BN - Relu '''
    mid_90d = layer1_multistream(sz_input,
                                 sz_input2,
                                 len(view_n),
                                 int(filt_num),
                                 do_vis=True,
                                 name="90d")(input_stack_90d)
    mid_0d = layer1_multistream(sz_input,
                                sz_input2,
                                len(view_n),
                                int(filt_num),
                                do_vis=True,
                                name="0d")(input_stack_0d)
    mid_45d = layer1_multistream(sz_input,
                                 sz_input2,
                                 len(view_n),
                                 int(filt_num),
                                 do_vis=True,
                                 name="45d")(input_stack_45d)
    mid_M45d = layer1_multistream(sz_input,
                                  sz_input2,
                                  len(view_n),
                                  int(filt_num),
                                  do_vis=True,
                                  name="M45d")(input_stack_M45d)
    ''' Merge layers '''
    mid_merged = concatenate([mid_90d, mid_0d, mid_45d, mid_M45d],
                             name='mid_merged')
    ''' Merged layer : Conv - Relu - Conv - BN - Relu '''
    mid_merged_ = layer2_merged(sz_input - 6, sz_input2 - 6, int(4 * filt_num),
                                int(4 * filt_num), conv_depth)(mid_merged)
    ''' Last Conv layer : Conv - Relu - Conv '''
    output = layer3_last(sz_input - 18, sz_input2 - 18, int(4 * filt_num),
                         int(4 * filt_num))(mid_merged_)

    feat_outs90d = [feat(input_stack_90d) for feat in feats[0:1]]
    feat_outs0d = [feat(input_stack_0d) for feat in feats[1:2]]
    feat_outs45d = [feat(input_stack_45d) for feat in feats[2:3]]
    feat_outsM45d = [feat(input_stack_M45d) for feat in feats[3:4]]
    outputs = feat_outs90d + feat_outs0d + feat_outs45d + feat_outsM45d + [
        output
    ]

    model_512 = Model(inputs=[
        input_stack_90d, input_stack_0d, input_stack_45d, input_stack_M45d
    ],
                      outputs=outputs)
    opt = RMSprop(lr=learning_rate)
    model_512.compile(optimizer=opt, loss='mae')
    model_512.summary()

    return model_512, feat_names
Exemplo n.º 11
0
def main():

    batch_size = _BATCH_SIZE
    noise_dim = _NOISE_DIM
    lamb = 10.0

    train = get_data()
    train_images, train_labels = make_batch(train)

    gen = generator()
    dis = discriminator()
    gen.summary()
    dis.summary()

    dis_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9)
    gen_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9)

    gen.trainable = True
    dis.trainable = False
    gen_inputs = Input(shape=(noise_dim, ))
    gen_outputs = gen(gen_inputs)
    dis_outputs = dis(gen_outputs)
    gen_model = Model(inputs=[gen_inputs], outputs=[dis_outputs])
    gen_model.compile(loss=wasserstein_loss, optimizer=gen_opt)
    gen_model.summary()

    gen.trainable = False
    dis.trainable = True
    real_inputs = Input(shape=train_images.shape[1:])
    dis_real_outputs = dis(real_inputs)

    fake_inputs = Input(shape=(noise_dim, ))
    gen_fake_outputs = gen(fake_inputs)
    dis_fake_outputs = dis(gen_fake_outputs)

    interpolate = RandomWeightedAverage()([real_inputs, gen_fake_outputs])
    dis_interpolate_outputs = dis(interpolate)

    gp_reg = partial(gradient_penalty, interpolate=interpolate, lamb=lamb)
    #gp_reg.__name__ = 'gradient_penalty'

    dis_model = Model(inputs=[real_inputs, fake_inputs],\
    outputs=[dis_real_outputs, dis_fake_outputs,dis_interpolate_outputs])

    dis_model.compile(loss=[wasserstein_loss, wasserstein_loss, gp_reg],
                      optimizer=dis_opt)
    dis_model.summary()

    max_epoch = 10001
    max_train_only_dis = 5
    minibatch_size = batch_size * max_train_only_dis
    max_loop = int(train_images.shape[0] / minibatch_size)

    real = np.zeros((batch_size, train_images.shape[1], train_images.shape[2],
                     train_images.shape[3]),
                    dtype=np.float32)
    minibatch_train_images = np.zeros(
        (minibatch_size, train_images.shape[1], train_images.shape[2],
         train_images.shape[3]),
        dtype=np.float32)

    progbar = Progbar(target=max_epoch)
    real_label = [-1] * batch_size
    fake_label = [1] * batch_size
    dummy_label = [0] * batch_size
    for epoch in range(max_epoch):

        np.random.shuffle(train_images)
        for loop in range(max_loop):

            minibatch_train_images = train_images[loop *
                                                  minibatch_size:(loop + 1) *
                                                  minibatch_size]
            for train_only_dis in range(max_train_only_dis):

                real = minibatch_train_images[train_only_dis *
                                              batch_size:(train_only_dis + 1) *
                                              batch_size]
                noise = np.random.uniform(
                    -1, 1, (batch_size, noise_dim)).astype(np.float32)
                dis_loss = dis_model.train_on_batch(
                    [real, noise], [real_label, fake_label, dummy_label])

            noise = np.random.uniform(-1, 1, (batch_size, noise_dim)).astype(
                np.float32)
            gen_loss = gen_model.train_on_batch(noise, real_label)

        progbar.add(1,
                    values=[("dis_loss", dis_loss[0]), ("gen_loss", gen_loss)])
        if epoch % 100 == 0:
            noise = np.random.uniform(-1, 1,
                                      (batch_size, 10)).astype(np.float32)
            fake = gen.predict(noise)
            tmp = [r.reshape(-1, 32) for r in fake]
            tmp = np.concatenate(tmp, axis=1)
            img = ((tmp / 2.0 + 0.5) * 255.0).astype(np.uint8)
            Image.fromarray(img).save("generate/%d.png" % (epoch))

    backend.clear_session()
Exemplo n.º 12
0
def train(data,
          file_name,
          nlayer,
          num_epochs=10,
          batch_size=128,
          train_temp=1,
          init=None,
          activation=tf.nn.relu):
    """
    Train a n-layer CNN for MNIST and CIFAR
    """
    inputs = Input(shape=(28, 28, 1))
    if nlayer == 2:
        x = Residual2(8, activation)(inputs)
        x = Lambda(activation)(x)
        x = Residual2(16, activation)(x)
        x = Lambda(activation)(x)
        x = AveragePooling2D(pool_size=7)(x)
        x = Flatten()(x)
        x = Dense(10)(x)
    if nlayer == 3:
        x = Residual2(8, activation)(inputs)
        x = Lambda(activation)(x)
        x = Residual(8, activation)(x)
        x = Lambda(activation)(x)
        x = Residual2(16, activation)(x)
        x = Lambda(activation)(x)
        x = AveragePooling2D(pool_size=7)(x)
        x = Flatten()(x)
        x = Dense(10)(x)
    if nlayer == 4:
        x = Residual2(8, activation)(inputs)
        x = Lambda(activation)(x)
        x = Residual(8, activation)(x)
        x = Lambda(activation)(x)
        x = Residual2(16, activation)(x)
        x = Lambda(activation)(x)
        x = Residual(16, activation)(x)
        x = Lambda(activation)(x)
        x = AveragePooling2D(pool_size=7)(x)
        x = Flatten()(x)
        x = Dense(10)(x)
    if nlayer == 5:
        x = Residual2(8, activation)(inputs)
        x = Lambda(activation)(x)
        x = Residual(8, activation)(x)
        x = Lambda(activation)(x)
        x = Residual(8, activation)(x)
        x = Lambda(activation)(x)
        x = Residual2(16, activation)(x)
        x = Lambda(activation)(x)
        x = Residual(16, activation)(x)
        x = Lambda(activation)(x)
        x = AveragePooling2D(pool_size=7)(x)
        x = Flatten()(x)
        x = Dense(10)(x)

    model = Model(inputs=inputs, outputs=x)

    # load initial weights when given
    if init != None:
        model.load_weights(init)

    # define the loss function which is the cross entropy between prediction and true label
    def fn(correct, predicted):
        return tf.nn.softmax_cross_entropy_with_logits(labels=correct,
                                                       logits=predicted /
                                                       train_temp)

    # initiate the Adam optimizer
    sgd = Adam()

    # compile the Keras model, given the specified loss and optimizer
    model.compile(loss=fn, optimizer=sgd, metrics=['accuracy'])

    model.summary()
    # run training with given dataset, and print progress
    history = model.fit(data.train_data,
                        data.train_labels,
                        batch_size=batch_size,
                        validation_data=(data.validation_data,
                                         data.validation_labels),
                        epochs=num_epochs,
                        shuffle=True)

    # save model to a file
    if file_name != None:
        model.save(file_name)

    return {'model': model, 'history': history}