def conv(ix, w): # filter shape: [filter_height, filter_width, in_channels, out_channels] # flatten filters filter_height = int(w.shape[0]) filter_width = int(w.shape[1]) in_channels = int(w.shape[2]) out_channels = int(w.shape[3]) ix_height = int(ix.shape[1]) ix_width = int(ix.shape[2]) ix_channels = int(ix.shape[3]) filter_shape = [filter_height, filter_width, in_channels, out_channels] flat_w = tf.reshape( w, [filter_height * filter_width * in_channels, out_channels]) patches = tf.extract_image_patches( ix, ksizes=[1, filter_height, filter_width, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='SAME') patches_reshaped = tf.reshape( patches, [-1, ix_height, ix_width, filter_height * filter_width * ix_channels]) feature_maps = [] for i in range(out_channels): feature_map = tf.reduce_sum(tf.multiply(flat_w[:, i], patches_reshaped), axis=3, keep_dims=True) feature_maps.append(feature_map) features = tf.concat(feature_maps, axis=3) return features
def compute_cost_volume(x1, x2, H, W, channel, d=9): x1 = tf.nn.l2_normalize(x1, axis=3) x2 = tf.nn.l2_normalize(x2, axis=3) x2_patches = tf.extract_image_patches(x2, [1, d, d, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='SAME') x2_patches = tf.reshape(x2_patches, [-1, H, W, d, d, channel]) x1_reshape = tf.reshape(x1, [-1, H, W, 1, 1, channel]) x1_dot_x2 = tf.multiply(x1_reshape, x2_patches) cost_volume = tf.reduce_sum(x1_dot_x2, axis=-1) #cost_volume = tf.reduce_mean(x1_dot_x2, axis=-1) cost_volume = tf.reshape(cost_volume, [-1, H, W, d * d]) return cost_volume
def extract_pointwise_conv2d_patches(inputs, filter_shape, name=None, data_format=None): """Extract patches for a 1x1 conv2d. Args: inputs: 4-D Tensor of shape [batch_size, height, width, in_channels]. filter_shape: List of 4 ints. Shape of filter to apply with conv2d() name: None or str. Name for Op. data_format: None or str. Format for data. See 'data_format' in tf.nn.conv2d() for details. Returns: Tensor of shape [batch_size, ..spatial_input_shape.., ..spatial_filter_shape.., in_channels] Raises: ValueError: if inputs is not 4-D. ValueError: if filter_shape is not [1, 1, ?, ?] ValueError: if data_format is not channels-last. """ if inputs.shape.ndims != 4: raise ValueError("inputs must have 4 dims.") if len(filter_shape) != 4: raise ValueError("filter_shape must have 4 dims.") if filter_shape[0] != 1 or filter_shape[1] != 1: raise ValueError("filter_shape must have shape 1 along spatial dimensions.") if not is_data_format_channel_last(data_format): raise ValueError("data_format must be channels last.") with tf.name_scope(name, "extract_pointwise_conv2d_patches", [inputs, filter_shape]): ksizes = [1, 1, 1, 1] # Spatial shape is 1x1. strides = [1, 1, 1, 1] # Operate on all pixels. rates = [1, 1, 1, 1] # Dilation has no meaning with spatial shape = 1. padding = "VALID" # Doesn't matter. result = tf.extract_image_patches(inputs, ksizes, strides, rates, padding) batch_size, input_height, input_width, in_channels = inputs.shape.as_list() filter_height, filter_width, in_channels, _ = filter_shape return tf.reshape(result, [ batch_size, input_height, input_width, filter_height, filter_width, in_channels ])
def forward(self): inp = self.inp.out s = self.lay.stride self.out = tf.extract_image_patches(inp, [1, s, s, 1], [1, s, s, 1], [1, 1, 1, 1], 'VALID')
def conv_capsule_mat(input_tensor, input_activation, input_dim, output_dim, layer_name, num_routing=3, num_in_atoms=3, num_out_atoms=3, stride=2, kernel_size=5, min_var=0.0005, final_beta=1.0): """Convolutional Capsule layer with Pose Matrices.""" print('caps conv stride: {}'.format(stride)) in_atom_sq = num_in_atoms * num_in_atoms with tf.variable_scope(layer_name): input_shape = tf.shape(input_tensor) _, _, _, in_height, in_width = input_tensor.get_shape() # This Variable will hold the state of the weights for the layer kernel = utils.weight_variable(shape=[ input_dim, kernel_size, kernel_size, num_in_atoms, output_dim * num_out_atoms ], stddev=0.3) # kernel = tf.clip_by_norm(kernel, 3.0, axes=[1, 2, 3]) activation_biases = utils.bias_variable( [1, 1, output_dim, 1, 1, 1, 1, 1], init_value=0.5, name='activation_biases') sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1, 1, 1], init_value=.5, name='sigma_biases') with tf.name_scope('conv'): print('convi;') # input_tensor: [x,128,8, c1,c2] -> [x*128,8, c1,c2] print(input_tensor.get_shape()) input_tensor_reshaped = tf.reshape(input_tensor, [ input_shape[0] * input_dim * in_atom_sq, input_shape[3], input_shape[4], 1 ]) input_tensor_reshaped.set_shape((None, input_tensor.get_shape()[3], input_tensor.get_shape()[4], 1)) input_act_reshaped = tf.reshape(input_activation, [ input_shape[0] * input_dim, input_shape[3], input_shape[4], 1 ]) input_act_reshaped.set_shape((None, input_tensor.get_shape()[3], input_tensor.get_shape()[4], 1)) print(input_tensor_reshaped.get_shape()) # conv: [x*128,out*out_at, c3,c4] conv_patches = tf.extract_image_patches( images=input_tensor_reshaped, ksizes=[1, kernel_size, kernel_size, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID', ) act_patches = tf.extract_image_patches( images=input_act_reshaped, ksizes=[1, kernel_size, kernel_size, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID', ) o_height = (in_height - kernel_size) // stride + 1 o_width = (in_width - kernel_size) // stride + 1 patches = tf.reshape(conv_patches, (input_shape[0], input_dim, in_atom_sq, o_height, o_width, kernel_size, kernel_size)) patches.set_shape((None, input_dim, in_atom_sq, o_height, o_width, kernel_size, kernel_size)) patch_trans = tf.transpose(patches, [1, 5, 6, 0, 3, 4, 2]) patch_split = tf.reshape( patch_trans, (input_dim, kernel_size, kernel_size, input_shape[0] * o_height * o_width * num_in_atoms, num_in_atoms)) patch_split.set_shape( (input_dim, kernel_size, kernel_size, None, num_in_atoms)) a_patches = tf.reshape(act_patches, (input_shape[0], input_dim, 1, 1, o_height, o_width, kernel_size, kernel_size)) a_patches.set_shape((None, input_dim, 1, 1, o_height, o_width, kernel_size, kernel_size)) with tf.name_scope('input_act'): utils.activation_summary( tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(a_patches, axis=1), axis=-1), axis=-1)) with tf.name_scope('Wx'): wx = tf.matmul(patch_split, kernel) wx = tf.reshape(wx, (input_dim, kernel_size, kernel_size, input_shape[0], o_height, o_width, num_in_atoms * num_out_atoms, output_dim)) wx.set_shape( (input_dim, kernel_size, kernel_size, None, o_height, o_width, num_in_atoms * num_out_atoms, output_dim)) wx = tf.transpose(wx, [3, 0, 7, 6, 4, 5, 1, 2]) utils.activation_summary(wx) with tf.name_scope('routing'): # Routing # logits: [x, 128, 10, c3, c4] logit_shape = [ input_dim, output_dim, 1, o_height, o_width, kernel_size, kernel_size ] activation, center = update_conv_routing( wx=wx, input_activation=a_patches, activation_biases=activation_biases, sigma_biases=sigma_biases, logit_shape=logit_shape, num_out_atoms=num_out_atoms * num_out_atoms, input_dim=input_dim, num_routing=num_routing, output_dim=output_dim, min_var=min_var, final_beta=final_beta, ) # activations: [x, 10, 8, c3, c4] out_activation = tf.squeeze(activation, axis=[1, 3, 6, 7]) out_center = tf.squeeze(center, axis=[1, 6, 7]) with tf.name_scope('center'): utils.activation_summary(out_center) return tf.sigmoid(out_activation), out_center
def _body(i, posterior, activation, center, masses): """Body of the EM while loop.""" del activation beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32))) # beta = final_beta # route: [outdim, height?, width?, batch, indim] vote_conf = posterior * input_activation # masses: [batch, 1, outdim, 1, height, width, 1, 1] masses = tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( vote_conf, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) + 0.0000001 preactivate_unrolled = vote_conf * wx # center: [batch, 1, outdim, outatom, height, width] center = .9 * tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( preactivate_unrolled, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) / masses + .1 * center noise = (wx - center) * (wx - center) variance = min_var + tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( vote_conf * noise, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) / masses log_variance = tf.log(variance) p_i = -1 * tf.reduce_sum(log_variance, axis=3, keep_dims=True) log_2pi = tf.log(2 * math.pi) win = masses * (p_i - sigma_biases * num_out_atoms * (log_2pi + 1.0)) logit = beta * (win - activation_biases * 5000) activation_update = tf.minimum( 0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit))) # return activation, center log_det_sigma = -1 * p_i sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0 exp_update = tf.reduce_sum(noise / (2 * variance), axis=3, keep_dims=True) prior_update = activation_update - sigma_update - exp_update max_prior_update = tf.reduce_max(tf.reduce_max(tf.reduce_max( tf.reduce_max(prior_update, axis=-1, keep_dims=True), axis=-2, keep_dims=True), axis=-3, keep_dims=True), axis=-4, keep_dims=True) prior_normal = tf.add(prior_update, -1 * max_prior_update) prior_exp = tf.exp(prior_normal) t_prior = tf.transpose(prior_exp, [0, 1, 2, 3, 4, 6, 5, 7]) c_prior = tf.reshape(t_prior, [-1, n * k, n * k, 1]) pad_prior = tf.pad(c_prior, [[0, 0], [(k - 1) * (k - 1), (k - 1) * (k - 1)], [(k - 1) * (k - 1), (k - 1) * (k - 1)], [0, 0]], 'CONSTANT') patch_prior = tf.extract_image_patches(images=pad_prior, ksizes=[1, k, k, 1], strides=[1, k, k, 1], rates=[1, k - 1, k - 1, 1], padding='VALID') sum_prior = tf.reduce_sum(patch_prior, axis=-1, keep_dims=True) sum_prior_patch = tf.extract_image_patches(images=sum_prior, ksizes=[1, k, k, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='VALID') sum_prior_reshape = tf.reshape( sum_prior_patch, [-1, input_dim, output_dim, 1, n, n, k, k]) + 0.0000001 posterior = prior_exp / sum_prior_reshape return (posterior, logit, center, masses)
def forward_pass_with_file_inputs(self, x): with self._graph.as_default(): if self._with_patching: # we want the largest multiple of of patch height/width that is smaller than the original # image height/width, for the final image dimensions patch_height = self._patch_height patch_width = self._patch_width final_height = (self._image_height // patch_height) * patch_height final_width = (self._image_width // patch_width) * patch_width # find image differences to determine recentering crop coords, we divide by 2 so that the leftover # is equal on all sides of image offset_height = (self._image_height - final_height) // 2 offset_width = (self._image_width - final_width) // 2 # pre-allocate output dimensions total_outputs = np.empty([1, final_height, final_width, 1]) else: total_outputs = np.empty( [1, self._image_height, self._image_width, 1]) num_batches = len(x) // self._batch_size remainder = len(x) % self._batch_size if remainder != 0: num_batches += 1 remainder = self._batch_size - remainder # self.load_images_from_list(x) no longer calls following 2 lines so we needed to force them here images = x self._parse_images(images) x_test = tf.train.batch([self._all_images], batch_size=self._batch_size, num_threads=self._num_threads) x_test = tf.reshape(x_test, shape=[ -1, self._image_height, self._image_width, self._image_depth ]) if self._with_patching: x_test = tf.image.crop_to_bounding_box(x_test, offset_height, offset_width, final_height, final_width) # Split the images up into the multiple slices of size patch_height x patch_width ksizes = [1, patch_height, patch_width, 1] strides = [1, patch_height, patch_width, 1] rates = [1, 1, 1, 1] x_test = tf.extract_image_patches(x_test, ksizes, strides, rates, "VALID") x_test = tf.reshape( x_test, shape=[-1, patch_height, patch_width, self._image_depth]) if self._load_from_saved: self.load_state() self._initialize_queue_runners() # Run model on them x_pred = self.forward_pass(x_test, deterministic=True) if self._with_patching: num_patch_rows = final_height // patch_height num_patch_cols = final_width // patch_width for i in range(num_batches): xx = self._session.run(x_pred) # generalized image stitching for img in np.array_split( xx, self._batch_size): # for each img in current batch # we are going to build a list of rows of imgs called img_rows, where each element # of img_rows is a row of img's concatenated together horizontally (axis=1), then we will # iterate through img_rows concatenating the rows vertically (axis=0) to build # the full img img_rows = [] # for each row for j in range(num_patch_rows): curr_row = img[ j * num_patch_cols] # start new row with first img # iterate through the rest of the row, concatenating img's together for k in range(1, num_patch_cols): # horizontal cat curr_row = np.concatenate( (curr_row, img[k + (j * num_patch_cols)]), axis=1) img_rows.append( curr_row) # add row of img's to the list # start full img with the first full row of imgs full_img = img_rows[0] # iterate through rest of rows, concatenating rows together for row_num in range(1, num_patch_rows): # vertical cat full_img = np.concatenate( (full_img, img_rows[row_num]), axis=0) # need to match total_outputs dimensions, so we add a dimension to the shape to match full_img = np.array([ full_img ]) # shape transformation: (x,y) --> (1,x,y) total_outputs = np.append( total_outputs, full_img, axis=0) # add the final img to the list else: for i in range(int(num_batches)): xx = self._session.run(x_pred) for img in np.array_split(xx, self._batch_size): total_outputs = np.append(total_outputs, img, axis=0) # delete weird first row total_outputs = np.delete(total_outputs, 0, 0) # delete any outputs which are overruns from the last batch if remainder != 0: for i in range(remainder): total_outputs = np.delete(total_outputs, -1, 0) return total_outputs