def forward(self, x): mc = self.mc batch, in_channel, zenith, azimuth = list(x.size()) size_z, size_a = mc.LCN_HEIGHT, mc.LCN_WIDTH condensing_kernel = torch.from_numpy( util.condensing_matrix(in_channel, size_z, size_a)).float() condensed_input = F.conv2d(x, weight=condensing_kernel.to(device), stride=self.stride, padding=self.padding) diff_x = x[:, 0, :, :].view(batch, 1, zenith, azimuth) \ - condensed_input[:, 0::in_channel, :, :] diff_y = x[:, 1, :, :].view(batch, 1, zenith, azimuth) \ - condensed_input[ :, 1::in_channel, :, :] diff_z = x[:, 2, :, :].view(batch, 1, zenith, azimuth) \ - condensed_input[ :, 2::in_channel, :, :] bi_filters = [] for cls in range(mc.NUM_CLASS): theta_r = mc.BILATERAL_THETA_R[cls] bi_filter = torch.exp(-(diff_x**2 + diff_y**2 + diff_z**2) / 2 / theta_r**2) bi_filters.append(bi_filter) bf_weight = torch.stack(bi_filters) bf_weight = bf_weight.transpose(0, 1) return bf_weight
def forward(self, x, lidar_mask, bilateral_filters): mc = self.mc size_z, size_a = mc.LCN_HEIGHT, mc.LCN_WIDTH # initialize compatibilty matrices compat_kernel_init = torch.from_numpy( np.reshape( np.ones((mc.NUM_CLASS, mc.NUM_CLASS), dtype="float32") - np.identity(mc.NUM_CLASS, dtype="float32"), [mc.NUM_CLASS, mc.NUM_CLASS, 1, 1])) bi_compat_kernel = compat_kernel_init * mc.BI_FILTER_COEF bi_compat_kernel.requires_grad_() angular_compat_kernel = compat_kernel_init * mc.ANG_FILTER_COEF angular_compat_kernel.requires_grad_() condensing_kernel = torch.from_numpy( util.condensing_matrix(mc.NUM_CLASS, size_z, size_a)).float() angular_filters = torch.from_numpy( util.angular_filter_kernel(mc.NUM_CLASS, size_z, size_a, mc.ANG_THETA_A**2)).float() bi_angular_filters = torch.from_numpy( util.angular_filter_kernel(mc.NUM_CLASS, size_z, size_a, mc.BILATERAL_THETA_A**2)).float() # GPU bi_compat_kernel, angular_compat_kernel, condensing_kernel, angular_filters, bi_angular_filters = \ bi_compat_kernel.to(device), angular_compat_kernel.to(device), condensing_kernel.to(device), angular_filters.to(device), bi_angular_filters.to(device) for it in range(mc.RCRF_ITER): unary = F.softmax(x, dim=-1) ang_output, bi_output = self.locally_connected_layer( unary, lidar_mask, bilateral_filters, angular_filters, bi_angular_filters, condensing_kernel) ang_output = F.conv2d(ang_output, weight=angular_compat_kernel, stride=self.stride, padding=0) bi_output = F.conv2d(bi_output, weight=bi_compat_kernel, stride=self.stride, padding=0) pairwise = torch.add(ang_output, bi_output) outputs = torch.add(unary, pairwise) x = outputs return outputs
def _bilateral_filter_layer(self, layer_name, inputs, thetas=[0.9, 0.01], sizes=[3, 5], stride=1, padding='SAME'): """Computing pairwise energy with a bilateral filter for CRF. Args: layer_name: layer name inputs: input tensor with shape [batch_size, zenith, azimuth, 2] where the last 2 elements are intensity and range of a lidar point. thetas: theta parameter for bilateral filter. sizes: filter size for zenith and azimuth dimension. strides: kernel strides. padding: padding. Returns: out: bilateral filter weight output with size [batch_size, zenith, azimuth, sizes[0]*sizes[1]-1, num_class]. Each [b, z, a, :, cls] represents filter weights around the center position for each class. """ assert padding == 'SAME', 'currently only supports "SAME" padding stategy' assert stride == 1, 'currently only supports striding of 1' assert sizes[0] % 2 == 1 and sizes[1] % 2 == 1, \ 'Currently only support odd filter size.' mc = self.mc theta_a, theta_r = thetas size_z, size_a = sizes pad_z, pad_a = size_z // 2, size_a // 2 half_filter_dim = (size_z * size_a) // 2 batch, zenith, azimuth, in_channel = inputs.shape.as_list() # assert in_channel == 1, 'Only support input channel == 1' with tf.variable_scope(layer_name) as scope: condensing_kernel = tf.constant(util.condensing_matrix( size_z, size_a, in_channel), dtype=tf.float32, name='condensing_kernel') condensed_input = tf.nn.conv2d(inputs, condensing_kernel, [1, 1, stride, 1], padding=padding, name='condensed_input') # diff_intensity = tf.reshape( # inputs[:, :, :], [batch, zenith, azimuth, 1]) \ # - condensed_input[:, :, :, ::in_channel] diff_x = tf.reshape( inputs[:, :, :, 0], [batch, zenith, azimuth, 1]) \ - condensed_input[:, :, :, 0::in_channel] diff_y = tf.reshape( inputs[:, :, :, 1], [batch, zenith, azimuth, 1]) \ - condensed_input[:, :, :, 1::in_channel] diff_z = tf.reshape( inputs[:, :, :, 2], [batch, zenith, azimuth, 1]) \ - condensed_input[:, :, :, 2::in_channel] bi_filters = [] for cls in range(mc.NUM_CLASS): theta_a = mc.BILATERAL_THETA_A[cls] theta_r = mc.BILATERAL_THETA_R[cls] bi_filter = tf.exp(-(diff_x**2 + diff_y**2 + diff_z**2) / 2 / theta_r**2) bi_filters.append(bi_filter) out = tf.transpose(tf.stack(bi_filters), [1, 2, 3, 4, 0], name='bilateral_filter_weights') return out
def _recurrent_crf_layer(self, layer_name, inputs, bilateral_filters, sizes=[3, 5], num_iterations=1, padding='SAME'): """Recurrent conditional random field layer. Iterative meanfield inference is implemented as a reccurent neural network. Args: layer_name: layer name inputs: input tensor with shape [batch_size, zenith, azimuth, num_class]. bilateral_filters: filter weight with shape [batch_size, zenith, azimuth, sizes[0]*size[1]-1]. sizes: size of the local region to be filtered. num_iterations: number of meanfield inferences. padding: padding strategy Returns: outputs: tensor with shape [batch_size, zenith, azimuth, num_class]. """ assert num_iterations >= 1, 'number of iterations should >= 1' mc = self.mc with tf.variable_scope(layer_name) as scope: # initialize compatibilty matrices compat_kernel_init = tf.constant(np.reshape( np.ones( (mc.NUM_CLASS, mc.NUM_CLASS)) - np.identity(mc.NUM_CLASS), [1, 1, mc.NUM_CLASS, mc.NUM_CLASS]), dtype=tf.float32) bi_compat_kernel = _variable_on_device( name='bilateral_compatibility_matrix', shape=[1, 1, mc.NUM_CLASS, mc.NUM_CLASS], initializer=compat_kernel_init * mc.BI_FILTER_COEF, trainable=True) self._activation_summary(bi_compat_kernel, 'bilateral_compat_mat') angular_compat_kernel = _variable_on_device( name='angular_compatibility_matrix', shape=[1, 1, mc.NUM_CLASS, mc.NUM_CLASS], initializer=compat_kernel_init * mc.ANG_FILTER_COEF, trainable=True) self._activation_summary(angular_compat_kernel, 'angular_compat_mat') self.model_params += [bi_compat_kernel, angular_compat_kernel] condensing_kernel = tf.constant(util.condensing_matrix( sizes[0], sizes[1], mc.NUM_CLASS), dtype=tf.float32, name='condensing_kernel') angular_filters = tf.constant(util.angular_filter_kernel( sizes[0], sizes[1], mc.NUM_CLASS, mc.ANG_THETA_A**2), dtype=tf.float32, name='angular_kernel') bi_angular_filters = tf.constant(util.angular_filter_kernel( sizes[0], sizes[1], mc.NUM_CLASS, mc.BILATERAL_THETA_A**2), dtype=tf.float32, name='bi_angular_kernel') for it in range(num_iterations): unary = tf.nn.softmax(inputs, dim=-1, name='unary_term_at_iter_{}'.format(it)) ang_output, bi_output = self._locally_connected_layer( 'message_passing_iter_{}'.format(it), unary, bilateral_filters, angular_filters, bi_angular_filters, condensing_kernel, sizes=sizes, padding=padding) # 1x1 convolution as compatibility transform ang_output = tf.nn.conv2d( ang_output, angular_compat_kernel, strides=[1, 1, 1, 1], padding='SAME', name='angular_compatibility_transformation') self._activation_summary(ang_output, 'ang_transfer_iter_{}'.format(it)) bi_output = tf.nn.conv2d( bi_output, bi_compat_kernel, strides=[1, 1, 1, 1], padding='SAME', name='bilateral_compatibility_transformation') self._activation_summary(bi_output, 'bi_transfer_iter_{}'.format(it)) pairwise = tf.add(ang_output, bi_output, name='pairwise_term_at_iter_{}'.format(it)) outputs = tf.add(unary, pairwise, name='energy_at_iter_{}'.format(it)) inputs = outputs return outputs