Exemplo n.º 1
0
def to_3d_135(cost_volume_135):
    feature = 4 * 9
    channel_135 = GlobalAveragePooling3D(
        data_format='channels_last')(cost_volume_135)
    channel_135 = Lambda(lambda y: K.expand_dims(
        K.expand_dims(K.expand_dims(y, 1), 1), 1))(channel_135)
    channel_135 = Conv3D(feature / 2,
                         1,
                         1,
                         'same',
                         data_format='channels_last')(channel_135)
    channel_135 = Activation('relu')(channel_135)
    channel_135 = Conv3D(3, 1, 1, 'same',
                         data_format='channels_last')(channel_135)
    channel_135 = Activation('sigmoid')(channel_135)
    channel_135 = Lambda(lambda y: K.concatenate([
        y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1],
        y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3],
        y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3]
    ],
                                                 axis=-1))(channel_135)
    channel_135 = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 9)))(
        channel_135)
    channel_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(channel_135)
    cv_135_tmp = multiply([channel_135, cost_volume_135])
    cv_135_tmp = Conv3D(feature / 2, 1, 1, 'same',
                        data_format='channels_last')(cv_135_tmp)
    cv_135_tmp = Activation('relu')(cv_135_tmp)
    cv_135_tmp = Conv3D(3, 1, 1, 'same',
                        data_format='channels_last')(cv_135_tmp)
    cv_135_tmp = Activation('sigmoid')(cv_135_tmp)
    attention_135 = Lambda(lambda y: K.concatenate([
        y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1],
        y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3],
        y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3]
    ],
                                                   axis=-1))(cv_135_tmp)
    attention_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(
        attention_135)
    cv_135_multi = multiply([attention_135, cost_volume_135])
    dres3 = convbn_3d(cv_135_multi, feature, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(cv_135_multi, feature / 4, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(dres3, 1, 3, 1)
    cost3 = Activation('relu')(dres3)
    cost3 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1),
                                                  (0, 2, 3, 1)))(cost3)
    return cost3, cv_135_multi
Exemplo n.º 2
0
def _get_135_CostVolume_(inputs):
    shape = K.shape(inputs[0])
    disparity_costs = []
    for d in range(-4, 5):
        if d == 0:
            tmp_list = []
            for i in range(len(inputs)):
                tmp_list.append(inputs[i])
        else:
            tmp_list = []
            for i in range(len(inputs)):
                (v, u) = divmod(i, 9)
                v = v + i
                u = 8 - u
                tensor = tf.contrib.image.translate(inputs[i],
                                                    [d * (u - 4), d * (v - 4)],
                                                    'BILINEAR')
                tmp_list.append(tensor)

        cost = K.concatenate(tmp_list, axis=3)
        disparity_costs.append(cost)
    cost_volume = K.stack(disparity_costs, axis=1)
    cost_volume = K.reshape(cost_volume,
                            (shape[0], 9, shape[1], shape[2], 4 * 9))
    return cost_volume
Exemplo n.º 3
0
    def call(self, x, mask=None):

        assert (len(x) == 2)

        img = x[0]
        rois = x[1]

        input_shape = K.shape(img)

        outputs = []

        for roi_idx in range(self.num_rois):

            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]

            row_length = w / float(self.pool_size)
            col_length = h / float(self.pool_size)

            num_pool_regions = self.pool_size

            #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op
            # in theano. The theano implementation is much less efficient and leads to long compile times
            x = K.cast(x, 'int32')
            y = K.cast(y, 'int32')
            w = K.cast(w, 'int32')
            h = K.cast(h, 'int32')

            rs = tf.image.resize_images(img[:, y:y + h, x:x + w, :],
                                        (self.pool_size, self.pool_size))
            outputs.append(rs)

        final_output = K.concatenate(outputs, axis=0)
        final_output = K.reshape(final_output,
                                 (1, self.num_rois, self.pool_size,
                                  self.pool_size, self.nb_channels))

        final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output
Exemplo n.º 4
0
def channel_attention(cost_volume):
    x = GlobalAveragePooling3D()(cost_volume)
    x = Lambda(
        lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x)
    x = Conv3D(170, 1, 1, 'same')(x)
    x = Activation('relu')(x)
    x = Conv3D(15, 1, 1, 'same')(x)  # [B, 1, 1, 1, 15]
    x = Activation('sigmoid')(x)

    # 15 -> 25
    # 0  1  2  3  4
    #    5  6  7  8
    #       9 10 11
    #         12 13
    #            14
    #
    # 0  1  2  3  4
    # 1  5  6  7  8
    # 2  6  9 10 11
    # 3  7 10 12 13
    # 4  8 11 13 14

    x = Lambda(lambda y: K.concatenate([
        y[:, :, :, :, 0:5], y[:, :, :, :, 1:2], y[:, :, :, :, 5:9],
        y[:, :, :, :, 2:3], y[:, :, :, :, 6:7], y[:, :, :, :, 9:12],
        y[:, :, :, :, 3:4], y[:, :, :, :, 7:8], y[:, :, :, :, 10:11],
        y[:, :, :, :, 12:14], y[:, :, :, :, 4:5], y[:, :, :, :, 8:9],
        y[:, :, :, :, 11:12], y[:, :, :, :, 13:15]
    ],
                                       axis=-1))(x)

    x = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 5, 5)))(x)
    x = Lambda(lambda y: tf.pad(y, [[0, 0], [0, 4], [0, 4]], 'REFLECT'))(x)
    attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x)
    x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention)
    return multiply([x, cost_volume]), attention
Exemplo n.º 5
0
content_array[:, :, :, 2] -= 123.68
style_array[:, :, :, 0] -= 103.939
style_array[:, :, :, 1] -= 116.779
style_array[:, :, :, 2] -= 123.68
content_array = content_array[:, :, :, ::-1]
style_array = style_array[:, :, :, ::-1]

# Create the backend variables. In our case tensorflow.
content_image = K.variable(content_array)
style_image = K.variable(style_array)
combination_image = K.placeholder((1, height, width, 3))

# Concatenate all tensors

input_tensor = K.concatenate([content_image,
                                    style_image,
                                    combination_image], axis=0)

# Load the VGG16 model from Keras. We are only interested in getting the features
# from the different layers hence we omit the dense layers at the top.
model = applications.VGG16(input_tensor=input_tensor, weights='imagenet',
              include_top=False)

# Store layers of the model. We'll need that to refer to the layers we want to
# use for the transfer.
layers = dict([(layer.name, layer.output) for layer in model.layers])
#pprint(layers)

# Define the total loss. We'll add to this in stages
loss = K.variable(0.)
Exemplo n.º 6
0
    def step(self, a, states):
        """

        :param a: ground-truth
        :param states:
         type: list
         index[:-2]: r, c, e (#: self.nb_layers)
         index[-2:] (if self.extrap_start_time is not None:): [frame_prediction, t+1]
        :return:
        """
        r_tm1 = states[:self.nb_layers]
        c_tm1 = states[self.nb_layers:2 * self.nb_layers]
        e_tm1 = states[2 * self.nb_layers:3 * self.nb_layers]

        if self.extrap_start_time is not None:
            t = states[-1]
            # if past self.extrap_start_time, the previous prediction will be treated as the actual
            a = K.switch(t >= self.t_extrap, states[-2], a)
        c = []
        r = []
        e = []
        for l in reversed(range(self.nb_layers)):
            inputs = [r_tm1[l], e_tm1[l]]
            if l < self.nb_layers - 1:
                inputs.append(r_up)
            inputs = K.concatenate(inputs, axis=self.channel_axis)
            # print l, inputs.shape
            i = self.conv_layers['i'][l].call(inputs)
            f = self.conv_layers['f'][l].call(inputs)
            o = self.conv_layers['o'][l].call(inputs)
            _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs)
            _r = o * self.LSTM_activation(_c)
            c.insert(0, _c)
            r.insert(0, _r)

            if l > 0:
                r_up = self.upsample.call(_r)  # upsampling

        for l in range(self.nb_layers):
            ahat = self.conv_layers['ahat'][l].call(r[l])
            if l == 0:
                ahat = K.minimum(ahat, self.pixel_max)
                frame_prediction = ahat
                ### threshold
                where = K.greater_equal(frame_prediction,
                                        K.constant(self.threshold))
                frame_prediction = tf.where(
                    where,
                    0.5 * tf.ones_like(frame_prediction, dtype=tf.float32),
                    tf.zeros_like(frame_prediction, dtype=tf.float32))
                ###
            # compute errors
            e_up = ahat - a
            e_down = a - ahat

            # ROI loss
            if l == 0 and self.use_roi_loss:
                e_up = tf.add(e_up,
                              tf.multiply(e_up, a, name='multiply_up_err'),
                              name='add_up_err')
                e_down = tf.add(e_down,
                                tf.multiply(e_down,
                                            a,
                                            name='multiply_down_err'),
                                name='add_down_err')
            #

            e_up = self.error_activation(e_up)
            e_down = self.error_activation(e_down)

            e.append(K.concatenate((e_up, e_down), axis=self.channel_axis))

            if self.output_layer_num == l:
                if self.output_layer_type == 'A':
                    output = a
                elif self.output_layer_type == 'Ahat':
                    output = ahat
                elif self.output_layer_type == 'R':
                    output = r[l]
                elif self.output_layer_type == 'E':
                    output = e[l]

            if l < self.nb_layers - 1:
                a = self.conv_layers['a'][l].call(e[l])
                a = self.pool.call(a)  # target for next layer (downsampling)

        if self.output_layer_type is None:
            if self.output_mode == 'prediction':
                output = frame_prediction

            else:
                for l in range(self.nb_layers):
                    layer_error = K.mean(K.batch_flatten(e[l]),
                                         axis=-1,
                                         keepdims=True)
                    # TODO: where is all_error ?
                    all_error = layer_error if l == 0 else K.concatenate(
                        (all_error, layer_error), axis=-1)
                    # print l, e[l].shape, layer_error.shape, all_error.shape
                if self.output_mode == 'error':
                    output = all_error
                else:
                    output = K.concatenate(
                        (K.batch_flatten(frame_prediction), all_error),
                        axis=-1)
                # print output.shape

        states = r + c + e
        if self.extrap_start_time is not None:
            ###
            '''
            sess = tf.get_default_session()
            comparison = tf.greater_equal(frame_prediction, tf.constant(0.3))
            sess.run(comparison)
            conditional_op = tf.assign(frame_prediction, tf.where(comparison, 0.5 * tf.ones_like(frame_prediction), tf.zeros_like(frame_prediction)))
            sess.run(conditional_op)
            '''
            ###
            states += [frame_prediction, t + 1]
        return output, states
Exemplo n.º 7
0
def main(_):
    width, height = preprocessing.image.load_img(FLAGS.target_img_path).size
    gen_img_height = 400
    gen_img_width = int(width * gen_img_height / height)

    target_x = preprocess_img(FLAGS.target_img_path, target_size=(gen_img_height, gen_img_width))
    target_img = K.constant(target_x)
    style_x = preprocess_img(FLAGS.style_img_path, target_size=(gen_img_height, gen_img_width))
    style_img = K.constant(style_x)
    combination_img = K.placeholder(shape=(1, gen_img_height, gen_img_width, 3))

    input_tensor = K.concatenate([
        target_img,
        style_img,
        combination_img
    ], axis=0)

    model = applications.vgg19.VGG19(input_tensor=input_tensor,
                                     weights='imagenet',
                                     include_top=False)
    model.summary()

    outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
    content_layer = 'block5_conv2'
    style_layers = [
        'block1_conv1',
        'block2_conv1',
        'block3_conv1',
        'block4_conv1',
        'block5_conv1'
    ]

    total_variation_weight = 1e-4
    style_weight = 1.0
    content_weight = 0.025

    loss = K.variable(0.)
    layer_features = outputs_dict[content_layer]
    target_img_features = layer_features[0, :, :, :]
    combination_features = layer_features[2, :, :, :]
    loss += content_weight * content_loss(target_img_features, combination_features)

    for layer_name in style_layers:
        layer_features = outputs_dict[layer_name]
        style_features = layer_features[1, :, :, :]
        combination_features = layer_features[2, :, :, :]
        sl = style_loss(style_features, combination_features,
                        target_size=(gen_img_height, gen_img_width))
        loss += (style_weight / len(style_layers)) * sl

    loss += total_variation_weight * total_variation_loss(combination_img, target_size=(gen_img_height, gen_img_width))

    # setup gradient-descent
    grads_list = K.gradients(loss, combination_img)
    grads = grads_list[0]

    fetch_loss_and_grads = K.function(inputs=[combination_img],
                                      outputs=[loss, grads])

    lossAndGradsCache = LossAndGradsCache(fetch_loss_and_grads,
                                          target_size=(gen_img_height, gen_img_width))

    x = preprocess_img(FLAGS.target_img_path, target_size=(gen_img_height, gen_img_width))
    x = x.flatten()

    for i in range(FLAGS.iterations):
        start_time = time.time()

        x, min_val, info = fmin_l_bfgs_b(lossAndGradsCache.loss,
                                         x,
                                         fprime=lossAndGradsCache.grads,
                                         maxfun=20)
        print('@{:4d}: {:.4f}'.format(i + 1, min_val))

        x_copy = x.copy().reshape((gen_img_height, gen_img_width, 3))
        print(np.min(x_copy), np.mean(x_copy), np.max(x_copy))
        img = deprocess_img(x_copy)
        os.makedirs('out', exist_ok=True)
        filename = 'out/result_{:04d}.png'.format(i + 1)
        imsave(filename, img)
        print('Iteration took {:.1f}s'.format(time.time() - start_time))