def channel_attention_mirror(cost_volume): x = GlobalAveragePooling3D()(cost_volume) x = Lambda( lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x) x = Conv3D(170, 1, 1, 'same')(x) x = Activation('relu')(x) x = Conv3D(25, 1, 1, 'same')(x) x = Activation('sigmoid')(x) x = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 5, 5)))(x) x = Lambda(lambda y: tf.pad(y, [[0, 0], [0, 4], [0, 4]], 'REFLECT'))(x) attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x) x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention) return multiply([x, cost_volume]), attention
def _get_135_CostVolume_(inputs): shape = K.shape(inputs[0]) disparity_costs = [] for d in range(-4, 5): if d == 0: tmp_list = [] for i in range(len(inputs)): tmp_list.append(inputs[i]) else: tmp_list = [] for i in range(len(inputs)): (v, u) = divmod(i, 9) v = v + i u = 8 - u tensor = tf.contrib.image.translate(inputs[i], [d * (u - 4), d * (v - 4)], 'BILINEAR') tmp_list.append(tensor) cost = K.concatenate(tmp_list, axis=3) disparity_costs.append(cost) cost_volume = K.stack(disparity_costs, axis=1) cost_volume = K.reshape(cost_volume, (shape[0], 9, shape[1], shape[2], 4 * 9)) return cost_volume
def sampling(args): z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon
def disparityregression(input): shape = K.shape(input) disparity_values = np.linspace(-4, 4, 9) x = K.constant(disparity_values, shape=[9]) x = K.expand_dims(K.expand_dims(K.expand_dims(x, 0), 0), 0) x = tf.tile(x, [shape[0], shape[1], shape[2], 1]) out = K.sum(multiply([input, x]), -1) return out
def channel_attention_free(cost_volume): x = GlobalAveragePooling3D()(cost_volume) x = Lambda( lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x) x = Conv3D(170, 1, 1, 'same')(x) x = Activation('relu')(x) x = Conv3D(81, 1, 1, 'same')(x) x = Activation('sigmoid')(x) attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x) x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention) return multiply([x, cost_volume]), attention
def to_3d_135(cost_volume_135): feature = 4 * 9 channel_135 = GlobalAveragePooling3D( data_format='channels_last')(cost_volume_135) channel_135 = Lambda(lambda y: K.expand_dims( K.expand_dims(K.expand_dims(y, 1), 1), 1))(channel_135) channel_135 = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('relu')(channel_135) channel_135 = Conv3D(3, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('sigmoid')(channel_135) channel_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(channel_135) channel_135 = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 9)))( channel_135) channel_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(channel_135) cv_135_tmp = multiply([channel_135, cost_volume_135]) cv_135_tmp = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('relu')(cv_135_tmp) cv_135_tmp = Conv3D(3, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('sigmoid')(cv_135_tmp) attention_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(cv_135_tmp) attention_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))( attention_135) cv_135_multi = multiply([attention_135, cost_volume_135]) dres3 = convbn_3d(cv_135_multi, feature, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 4, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(dres3, 1, 3, 1) cost3 = Activation('relu')(dres3) cost3 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost3) return cost3, cv_135_multi
def channel_attention(cost_volume): x = GlobalAveragePooling3D()(cost_volume) x = Lambda( lambda y: K.expand_dims(K.expand_dims(K.expand_dims(y, 1), 1), 1))(x) x = Conv3D(170, 1, 1, 'same')(x) x = Activation('relu')(x) x = Conv3D(15, 1, 1, 'same')(x) # [B, 1, 1, 1, 15] x = Activation('sigmoid')(x) # 15 -> 25 # 0 1 2 3 4 # 5 6 7 8 # 9 10 11 # 12 13 # 14 # # 0 1 2 3 4 # 1 5 6 7 8 # 2 6 9 10 11 # 3 7 10 12 13 # 4 8 11 13 14 x = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:5], y[:, :, :, :, 1:2], y[:, :, :, :, 5:9], y[:, :, :, :, 2:3], y[:, :, :, :, 6:7], y[:, :, :, :, 9:12], y[:, :, :, :, 3:4], y[:, :, :, :, 7:8], y[:, :, :, :, 10:11], y[:, :, :, :, 12:14], y[:, :, :, :, 4:5], y[:, :, :, :, 8:9], y[:, :, :, :, 11:12], y[:, :, :, :, 13:15] ], axis=-1))(x) x = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 5, 5)))(x) x = Lambda(lambda y: tf.pad(y, [[0, 0], [0, 4], [0, 4]], 'REFLECT'))(x) attention = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 81)))(x) x = Lambda(lambda y: K.repeat_elements(y, 4, -1))(attention) return multiply([x, cost_volume]), attention
def call(self, x, mask=None): assert (len(x) == 2) img = x[0] rois = x[1] input_shape = K.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = w / float(self.pool_size) col_length = h / float(self.pool_size) num_pool_regions = self.pool_size #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op # in theano. The theano implementation is much less efficient and leads to long compile times x = K.cast(x, 'int32') y = K.cast(y, 'int32') w = K.cast(w, 'int32') h = K.cast(h, 'int32') rs = tf.image.resize_images(img[:, y:y + h, x:x + w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = K.concatenate(outputs, axis=0) final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output
def UpSampling3DBilinear_(x, size): shape = K.shape(x) x = K.reshape(x, (shape[0] * shape[1], shape[2], shape[3], shape[4])) x = tf.image.resize_bilinear(x, size, align_corners=True) x = K.reshape(x, (shape[0], shape[1], size[0], size[1], shape[4])) return x
def main(_): # disable all training specific operations K.set_learning_phase(0) model = applications.inception_v3.InceptionV3(weights='imagenet', include_top=False) layer_contributions = { 'mixed2': 0.2, 'mixed3': 3.0, 'mixed4': 2.0, 'mixed5': 1.5 } layer_dict = dict([(layer.name, layer) for layer in model.layers]) loss = K.variable(0.,) for layer_name in layer_contributions: coeff = layer_contributions[layer_name] activation = layer_dict[layer_name].output scaling = K.prod(K.cast(K.shape(activation), 'float32')) # avoid artifacts by only involving non-boarder pixels loss += coeff * K.sum(K.square(activation[:, 2:-2, 2:-2, :])) / scaling # start the gradient-ascent process dream = model.input grads_list = K.gradients(loss, dream) grads = grads_list[0] # trick: normalize gradients grads /= K.maximum(K.mean(K.abs(grads)), 1e-7) fetch_loss_and_grads = K.function(inputs=[dream], outputs=[loss, grads]) def gradient_ascent(x, iterations, step_rate, max_loss=None): for i in range(iterations): loss_value, grads_value = fetch_loss_and_grads([x]) if max_loss is not None and loss_value > max_loss: break print('@{:4d}: {:.4f}'.format(i, loss_value)) x += step_rate * grads_value return x img = preprocess_img(FLAGS.img_path) original_shape = img.shape[1:3] successive_shapes = [original_shape] for i in range(1, NUM_OCTAVES): shape = tuple([int(dim / (OCTAVES_SCLAE ** i)) for dim in original_shape]) successive_shapes.append(shape) # reverse successive_shapes = successive_shapes[::-1] original_img = np.copy(img) shrunk_original_img = resize_img(img, successive_shapes[0]) for shape in successive_shapes: print('Preprocess image with shape: {}'.format(shape)) img = resize_img(img, shape) img = gradient_ascent(img, iterations=FLAGS.iterations, step_rate=FLAGS.step_rate, max_loss=MAX_LOSS) same_size_original = resize_img(original_img, shape) if FLAGS.repair_lost_detail: upscale_shrunk_original_img = resize_img(shrunk_original_img, shape) lost_detail = same_size_original - upscale_shrunk_original_img img += lost_detail shrunk_original_img = same_size_original save_img(img, filename='dream_at_scale_{}.png'.format(str(shape))) save_img(img, filename='dream.png')