def _has_foreground_and_background_in_first_frame(label, subsampling_factor): """Checks if the labels have foreground and background in the first frame. Args: label: Label tensor of shape [num_frames, height, width, 1]. subsampling_factor: Integer, the subsampling factor. Returns: Boolean, whether the labels have foreground and background in the first frame. """ h, w = train_utils.resolve_shape(label)[1:3] label_downscaled = tf.squeeze( tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h // subsampling_factor, w // subsampling_factor], align_corners=True), axis=0) is_bg = tf.equal(label_downscaled, 0) is_fg = tf.logical_not(is_bg) # Just using reduce_any was not robust enough, so lets make sure the count # is above MIN_LABEL_COUNT. fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32)) bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32)) has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT) has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT) return tf.logical_and(has_bg, has_fg)
def add_volume_iou_metrics(inputs, outputs): """Computes the per-instance volume IOU. Args: inputs: Input dictionary of the voxel generation model. outputs: Output dictionary returned by the voxel generation model. Returns: names_to_values: metrics->values (dict). names_to_updates: metrics->ops (dict). """ names_to_values = dict() names_to_updates = dict() labels = tf.greater_equal(inputs['voxels'], 0.5) predictions = tf.greater_equal(outputs['voxels_1'], 0.5) labels = 2 - tf.to_int32(labels) predictions = 3 - tf.to_int32(predictions) * 2 tmp_values, tmp_updates = tf.metrics.mean_iou( labels=labels, predictions=predictions, num_classes=3) names_to_values['volume_iou'] = tmp_values * 3.0 names_to_updates['volume_iou'] = tmp_updates return names_to_values, names_to_updates
def _has_foreground_and_background_in_first_frame_2(label, decoder_output_stride): """Checks if the labels have foreground and background in the first frame. Second attempt, this time we use the actual output dimension for resizing. Args: label: Label tensor of shape [num_frames, height, width, 1]. decoder_output_stride: Integer, the stride of the decoder output. Returns: Boolean, whether the labels have foreground and background in the first frame. """ h, w = train_utils.resolve_shape(label)[1:3] h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride) w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride) label_downscaled = tf.squeeze( tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h_sub, w_sub], align_corners=True), axis=0) is_bg = tf.equal(label_downscaled, 0) is_fg = tf.logical_not(is_bg) # Just using reduce_any was not robust enough, so lets make sure the count # is above MIN_LABEL_COUNT. fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32)) bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32)) has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT) has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT) return tf.logical_and(has_bg, has_fg)
def getReward_touch(objCoordinates, sampled_locs, numObjsPresented, objSize, batch_size): # preallocate for the reward corner = tf.zeros((2,), dtype=tf.float32, name=None) # reward = np.zeros(batch_size) # loop over all examples in the batch # for b in xrange(batch_size): b = 0 objCoords_b = objCoordinates[b,:,:] sampled_locs_b = sampled_locs[b,:,:] numObjsPres_b = numObjsPresented[b] nObjTouched = 0 # for the ith-example in the batch, loop over all object for j in xrange(maxNumObj): objCoords_cur = objCoords_b[j,:] nTimesObjTouched = 0 # for the j-th objects, loop over all glimpses to determine if it is fixated for i in xrange(nGlimpses): sampledCoord_cur = toMnistCoordinates_tf(sampled_locs_b[i,:], img_size) l2Diff_obj = l2distance(objCoords_cur, sampledCoord_cur) l2Diff_corner = l2distance(corner, sampledCoord_cur) isTouchingObj = tf.less_equal(l2Diff_obj, objSize) isNotTouchingCorner = tf.greater_equal(l2Diff_corner, objSize) # true if the current glimpse is fixated on an object tempTouchFlag = tf.cast(tf.logical_and(isTouchingObj, isNotTouchingCorner), tf.int32) nTimesObjTouched = nTimesObjTouched + tempTouchFlag # for the b-th example in the batch, if all objects are touched, then reward = 1, else reward = 0 nObjTouched = nObjTouched + tf.cast(tf.greater_equal(nTimesObjTouched,1), tf.int32) R_bth = tf.equal(nObjTouched, tf.cast(numObjsPres_b, tf.int32)) return R_bth
def prune_completely_outside_window(boxlist, window, scope=None): """Prunes bounding boxes that fall completely outside of the given window. The function clip_to_window prunes bounding boxes that fall completely outside the window, but also clips any bounding boxes that partially overflow. This function does not clip partially overflowing boxes. Args: boxlist: a BoxList holding M_in boxes. window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of the window scope: name scope. Returns: pruned_boxlist: a new BoxList with all bounding boxes partially or fully in the window. valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes in the input tensor. """ with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'): y_min, x_min, y_max, x_max = tf.split( value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) coordinate_violations = tf.concat([ tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) ], 1) valid_indices = tf.reshape( tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) return gather(boxlist, valid_indices), valid_indices
def add_dyprune(weights): crate = config.crate[weights.name[:-2]] #hyperpara C rate prune_mask = tf.Variable(tf.ones_like(weights),name=weights.name[:-2]+'mask', trainable=False) #calculate mask mean = tf.divide(tf.reduce_sum(tf.multiply(tf.abs(weights),prune_mask)),tf.reduce_sum(prune_mask)) var = tf.multiply(weights,prune_mask) var = tf.square(var) mean_q = tf.square(mean)*tf.reduce_sum(prune_mask) var = tf.reduce_sum(var) - mean_q var = tf.divide(var,tf.reduce_sum(prune_mask)) var = tf.sqrt(var) t1_lower = (mean+var*crate)*0.25 #hyperpara a t1_upper = (mean+var*crate)*0.45 #hyperpara b indicator_lower1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_lower) indicator_upper1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_upper) indicator_matrix1 = tf.greater_equal(prune_mask, tf.zeros_like(weights)) indicator_matrix1 = tf.logical_and(indicator_matrix1,indicator_lower1) indicator_matrix1 = tf.logical_or(indicator_matrix1,indicator_upper1) indicator_matrix1 = tf.to_float(indicator_matrix1) update = prune_mask.assign(indicator_matrix1) prune_fc = tf.multiply(weights, prune_mask) return prune_fc
def build_graph(self, nn_im_w, nn_im_h, num_colour_channels=3, weights=None, biases=None): num_outputs = 1 #ofc self.nn_im_w = nn_im_w self.nn_im_h = nn_im_h if weights is None: weights = [None, None, None, None, None] if biases is None: biases = [None, None, None, None, None] with tf.device('/cpu:0'): # Placeholder variables for the input image and output images self.x = tf.placeholder(tf.float32, shape=[None, nn_im_w*nn_im_h*3]) self.y_ = tf.placeholder(tf.float32, shape=[None, num_outputs]) self.threshold = tf.placeholder(tf.float32) # Build the convolutional and pooling layers conv1_output_channels = 32 conv2_output_channels = 16 conv3_output_channels = 8 conv_layer_1_input = tf.reshape(self.x, [-1, nn_im_h, nn_im_w, num_colour_channels]) #The resized input image self.build_conv_layer(conv_layer_1_input, num_colour_channels, conv1_output_channels, initial_weights=weights[0], initial_biases=biases[0]) # layer 1 self.build_conv_layer(self.layers[0][0], conv1_output_channels, conv2_output_channels, initial_weights=weights[1], initial_biases=biases[1])# layer 2 self.build_conv_layer(self.layers[1][0], conv2_output_channels, conv3_output_channels, initial_weights=weights[2], initial_biases=biases[2])# layer 3 # Build the fully connected layer convnet_output_w = nn_im_w//8 convnet_output_h = nn_im_h//8 fully_connected_layer_input = tf.reshape(self.layers[2][0], [-1, convnet_output_w * convnet_output_h * conv3_output_channels]) self.build_fully_connected_layer(fully_connected_layer_input, convnet_output_w, convnet_output_h, conv3_output_channels, initial_weights=weights[3], initial_biases=biases[3]) # The dropout stage and readout layer self.keep_prob, self.h_drop = self.dropout(self.layers[3][0]) self.y_conv,_,_ = self.build_readout_layer(self.h_drop, num_outputs, initial_weights=weights[4], initial_biases=biases[4]) self.mean_error = tf.sqrt(tf.reduce_mean(tf.square(self.y_ - self.y_conv))) self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.mean_error) self.accuracy = (1.0 - tf.reduce_mean(tf.abs(self.y_ - tf.round(self.y_conv)))) positive_examples = tf.greater_equal(self.y_, 0.5) negative_examples = tf.logical_not(positive_examples) positive_classifications = tf.greater_equal(self.y_conv, self.threshold) negative_classifications = tf.logical_not(positive_classifications) self.true_positive = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, positive_classifications),tf.int32)) # count the examples that are positive and classified as positive self.false_positive = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, positive_classifications),tf.int32)) # count the examples that are negative but classified as positive self.true_negative = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, negative_classifications),tf.int32)) # count the examples that are negative and classified as negative self.false_negative = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, negative_classifications),tf.int32)) # count the examples that are positive but classified as negative self.positive_count = tf.reduce_sum(tf.cast(positive_examples, tf.int32)) # count the examples that are positive self.negative_count = tf.reduce_sum(tf.cast(negative_examples, tf.int32)) # count the examples that are negative self.confusion_matrix = tf.reshape(tf.pack([self.true_positive, self.false_positive, self.false_negative, self.true_negative]), [2,2]) self.sess.run(tf.initialize_all_variables())
def pad_to_bounding_box(image, offset_height, offset_width, target_height, target_width, pad_value): """Pads the given image with the given pad_value. Works like tf.image.pad_to_bounding_box, except it can pad the image with any given arbitrary pad value and also handle images whose sizes are not known during graph construction. Args: image: 3-D tensor with shape [height, width, channels] offset_height: Number of rows of zeros to add on top. offset_width: Number of columns of zeros to add on the left. target_height: Height of output image. target_width: Width of output image. pad_value: Value to pad the image tensor with. Returns: 3-D tensor of shape [target_height, target_width, channels]. Raises: ValueError: If the shape of image is incompatible with the offset_* or target_* arguments. """ image_rank = tf.rank(image) image_rank_assert = tf.Assert( tf.equal(image_rank, 3), ['Wrong image tensor rank [Expected] [Actual]', 3, image_rank]) with tf.control_dependencies([image_rank_assert]): image -= pad_value image_shape = tf.shape(image) height, width = image_shape[0], image_shape[1] target_width_assert = tf.Assert( tf.greater_equal( target_width, width), ['target_width must be >= width']) target_height_assert = tf.Assert( tf.greater_equal(target_height, height), ['target_height must be >= height']) with tf.control_dependencies([target_width_assert]): after_padding_width = target_width - offset_width - width with tf.control_dependencies([target_height_assert]): after_padding_height = target_height - offset_height - height offset_assert = tf.Assert( tf.logical_and( tf.greater_equal(after_padding_width, 0), tf.greater_equal(after_padding_height, 0)), ['target size not possible with the given target offsets']) height_params = tf.stack([offset_height, after_padding_height]) width_params = tf.stack([offset_width, after_padding_width]) channel_params = tf.stack([0, 0]) with tf.control_dependencies([offset_assert]): paddings = tf.stack([height_params, width_params, channel_params]) padded = tf.pad(image, paddings) return padded + pad_value
def distort_image(image, input_width, input_height, output_side): """Applies random distortion to the image. The output image is output_side x output_side x 3 """ def random_crop_it(): """Random crops image, after resizing it to output_side +10 x output_side+10""" resized_img = resize_bl(image, output_side + 10) return tf.random_crop(resized_img, [output_side, output_side, 3]) def resize_it(): """Resize the image using resize_bl""" return resize_bl(image, output_side) # if input.width >= output.side + 10 and input.heigth >= output.side + 10 # resize it to output.side + 10 x output.size + 10 and random crop it # else resize it increased_output_side = tf.constant(output_side + 10, dtype=tf.int64) image = tf.cond( tf.logical_and( tf.greater_equal(input_width, increased_output_side), tf.greater_equal(input_height, increased_output_side)), random_crop_it, resize_it) # Apply random distortions to the image flipped_image = tf.image.random_flip_left_right(image) # randomize the order of the random distortions def fn1(): """Applies random brightness, saturation, hue, contrast""" distorted_image = tf.image.random_brightness( flipped_image, max_delta=32. / 255.) distorted_image = tf.image.random_saturation( distorted_image, lower=0.5, upper=1.5) distorted_image = tf.image.random_hue(distorted_image, max_delta=0.2) distorted_image = tf.image.random_contrast( distorted_image, lower=0.5, upper=1.5) return distorted_image def fn2(): """Applies random brightness, contrast, saturation, hue""" distorted_image = tf.image.random_brightness( flipped_image, max_delta=32. / 255.) distorted_image = tf.image.random_contrast( distorted_image, lower=0.5, upper=1.5) distorted_image = tf.image.random_saturation( distorted_image, lower=0.5, upper=1.5) distorted_image = tf.image.random_hue(distorted_image, max_delta=0.2) return distorted_image p_order = tf.random_uniform( shape=[], minval=0.0, maxval=1.0, dtype=tf.float32) distorted_image = tf.cond(tf.less(p_order, 0.5), fn1, fn2) distorted_image = tf.clip_by_value(distorted_image, 0.0, 1.0) return distorted_image
def optimOp(): def updateOptimOp(): if self._full_stats_init: return tf.cond(tf.greater(self.factor_step, tf.convert_to_tensor(0)), lambda: optim.apply_gradients(list(zip(u, varlist))), tf.no_op) else: return optim.apply_gradients(list(zip(u, varlist))) if self._full_stats_init: return tf.cond(tf.greater_equal(self.stats_step, self._stats_accum_iter), updateOptimOp, tf.no_op) else: return tf.cond(tf.greater_equal(self.sgd_step, self._cold_iter), updateOptimOp, tf.no_op)
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ if len(preprocessed_inputs.get_shape().as_list()) != 4: raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a ' 'tensor of shape %s' % preprocessed_inputs.get_shape()) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) with tf.control_dependencies([shape_assert]): # Disables batchnorm for fine-tuning with smaller batch sizes. # TODO(chensun): Figure out if it is needed when image # batch size is bigger. with slim.arg_scope( resnet_utils.resnet_arg_scope( batch_norm_epsilon=1e-5, batch_norm_scale=True, weight_decay=self._weight_decay)): with tf.variable_scope( self._architecture, reuse=self._reuse_weights) as var_scope: _, activations = self._resnet_model( preprocessed_inputs, num_classes=None, is_training=self._train_batch_norm, global_pool=False, output_stride=self._first_stage_features_stride, spatial_squeeze=False, scope=var_scope) handle = scope + '/%s/block3' % self._architecture return activations[handle], activations
def _crop(image, offset_height, offset_width, crop_height, crop_width): """Crops the given image using the provided offsets and sizes. Note that the method doesn't assume we know the input image size but it does assume we know the input image rank. Args: image: an image of shape [height, width, channels]. offset_height: a scalar tensor indicating the height offset. offset_width: a scalar tensor indicating the width offset. crop_height: the height of the cropped image. crop_width: the width of the cropped image. Returns: The cropped (and resized) image. Raises: ValueError: if `image` doesn't have rank of 3. InvalidArgumentError: if the rank is not 3 or if the image dimensions are less than the crop size. """ original_shape = tf.shape(image) if len(image.get_shape().as_list()) != 3: raise ValueError('input must have rank of 3') original_channels = image.get_shape().as_list()[2] rank_assertion = tf.Assert( tf.equal(tf.rank(image), 3), ['Rank of image must be equal to 3.']) with tf.control_dependencies([rank_assertion]): cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) size_assertion = tf.Assert( tf.logical_and( tf.greater_equal(original_shape[0], crop_height), tf.greater_equal(original_shape[1], crop_width)), ['Crop size greater than the image size.']) offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) # Use tf.slice instead of crop_to_bounding box as it accepts tensors to # define the crop size. with tf.control_dependencies([size_assertion]): image = tf.slice(image, offsets, cropped_shape) image = tf.reshape(image, cropped_shape) image.set_shape([crop_height, crop_width, original_channels]) return image
def testSomeUnweightedExamples(self): # Setup test data with 4 examples, but should produce the same # results as testSimple. example_protos = [ # Will be used. make_example_proto({"age": [0], "gender": [0]}, 0), # Will be ignored. make_example_proto({"age": [1], "gender": [0]}, 0), # Will be used. make_example_proto({"age": [1], "gender": [1]}, 1), # Will be ignored. make_example_proto({"age": [1], "gender": [0]}, 1), ] example_weights = [1.0, 0.0, 1.0, 0.0] with self._single_threaded_test_session(): # Only use examples 0 and 2 examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="logistic_loss") tf.initialize_all_variables().run() lr = SdcaModel(CONTAINER, examples, variables, options) unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) prediction = lr.predictions(examples) lr.minimize().run() self.assertAllClose(0.395226, unregularized_loss.eval(), rtol=3e-2, atol=3e-2) self.assertAllClose(0.657446, loss.eval(), rtol=3e-2, atol=3e-2) predicted_labels = tf.cast(tf.greater_equal(prediction, tf.ones_like(prediction) * 0.5), tf.float32) self.assertAllClose([0, 1, 1, 1], predicted_labels.eval())
def prune_non_overlapping_boxes( boxlist1, boxlist2, min_overlap=0.0, scope=None): """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2. For each box in boxlist1, we want its IOA to be more than minoverlap with at least one of the boxes in boxlist2. If it does not, we remove it. Args: boxlist1: BoxList holding N boxes. boxlist2: BoxList holding M boxes. min_overlap: Minimum required overlap between boxes, to count them as overlapping. scope: name scope. Returns: new_boxlist1: A pruned boxlist with size [N', 4]. keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the first input BoxList `boxlist1`. """ with tf.name_scope(scope, 'PruneNonOverlappingBoxes'): ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap)) keep_inds = tf.squeeze(tf.where(keep_bool), squeeze_dims=[1]) new_boxlist1 = gather(boxlist1, keep_inds) return new_boxlist1, keep_inds
def make_optimizer(loss, variables, name='Adam'): """ Adam optimizer with learning rate 0.0002 for the first 100k steps (~100 epochs) and a linearly decaying rate that goes to zero over the next 100k steps """ global_step = tf.Variable(0, trainable=False) starter_learning_rate = self.learning_rate end_learning_rate = 0.0 start_decay_step = 100000 decay_steps = 100000 beta1 = self.beta1 learning_rate = ( tf.where( tf.greater_equal(global_step, start_decay_step), tf.train.polynomial_decay(starter_learning_rate, global_step-start_decay_step, decay_steps, end_learning_rate, power=1.0), starter_learning_rate ) ) tf.summary.scalar('learning_rate/{}'.format(name), learning_rate) learning_step = ( tf.train.AdamOptimizer(learning_rate, beta1=beta1, name=name) .minimize(loss, global_step=global_step, var_list=variables) ) return learning_step
def testImbalanced(self): # Setup test data with 1 positive, and 3 negative examples. example_protos = [ make_example_proto({"age": [0], "gender": [0]}, 0), make_example_proto({"age": [2], "gender": [0]}, 0), make_example_proto({"age": [3], "gender": [0]}, 0), make_example_proto({"age": [1], "gender": [1]}, 1), ] example_weights = [1.0, 1.0, 1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(3, 1) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="logistic_loss", prior=-1.09861 ) tf.initialize_all_variables().run() lr = SdcaModel(CONTAINER, examples, variables, options) unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) prediction = lr.predictions(examples) lr.minimize().run() self.assertAllClose(0.331710, unregularized_loss.eval(), rtol=3e-2, atol=3e-2) self.assertAllClose(0.591295, loss.eval(), rtol=3e-2, atol=3e-2) predicted_labels = tf.cast(tf.greater_equal(prediction, tf.ones_like(prediction) * 0.5), tf.float32) self.assertAllEqual([0, 0, 0, 1], predicted_labels.eval())
def testOneOpCond(self): with self.test_session(): v = tf.Variable(0) c = tf.convert_to_tensor(0) one = tf.convert_to_tensor(1) two = tf.convert_to_tensor(2) p = tf.greater_equal(c, 1) def a(): return tf.assign(v, one) def b(): return tf.assign(v, two) i = tf.cond(p, a, b) self.assertTrue(isinstance(i, tf.Tensor)) tf.initialize_all_variables().run() self.assertEqual(0, v.eval()) # True case: c = 2 is >= 1, v is set to 1. self.assertEqual(1, i.eval(feed_dict={c.name: 2})) self.assertEqual(1, v.eval()) # False case: c = 0 is not >= 1, v is set to 2. self.assertEqual(2, i.eval(feed_dict={c.name: 0})) self.assertEqual(2, v.eval())
def encoder_body(time, old_state, output_ta_t): x_t = input_ta.read(time) con = tf.concat(1, [x_t, old_state]) z = tf.sigmoid(tf.matmul(con, W_z) + b_z) r = tf.sigmoid(tf.matmul(con, W_r) + b_r) con = tf.concat(1, [x_t, r*old_state]) h = tf.tanh(tf.matmul(con, W_h) + b_h) new_state = (1-z)*h + z*old_state output_ta_t = output_ta_t.write(time, new_state) def updateall(): return new_state def updatesome(): if reverse: return tf.select( tf.greater_equal(time, max_sequence_length-lengths), new_state, old_state) else: return tf.select(tf.less(time, lengths), new_state, old_state) if reverse: state = tf.cond( tf.greater_equal(time, max_sequence_length-min_sequence_length), updateall, updatesome) else: state = tf.cond(tf.less(time, min_sequence_length), updateall, updatesome) return (time + 1, state, output_ta_t)
def drawGraph(self, n_row, n_latent, n_col): with tf.name_scope('matDecomp'): self._p = tf.placeholder(tf.float32, shape=[None, n_col]) self._c = tf.placeholder(tf.float32, shape=[None, n_col]) self._lambda = tf.placeholder(tf.float32) self._index = tf.placeholder(tf.float32, shape=[None, n_row]) self._A = tf.Variable(tf.truncated_normal([n_row, n_latent])) self._B = tf.Variable(tf.truncated_normal([n_latent, n_col])) self._h = tf.matmul(tf.matmul(self._index, self._A), self._B) weighted_loss = tf.reduce_mean(tf.mul(self._c, tf.squared_difference(self._p, self._h))) self._weighted_loss = weighted_loss l2_A = tf.reduce_sum(tf.square(self._A)) l2_B = tf.reduce_sum(tf.square(self._B)) n_w = tf.constant(n_row * n_latent + n_latent * n_col, tf.float32) l2 = tf.truediv(tf.add(l2_A, l2_B), n_w) reg_term = tf.mul(self._lambda, l2) self._loss = tf.add(weighted_loss, reg_term) self._mask = tf.placeholder(tf.float32, shape=[n_row, n_col]) one = tf.constant(1, tf.float32) pred = tf.cast(tf.greater_equal(tf.matmul(self._A, self._B), one), tf.float32) cor = tf.mul(tf.cast(tf.equal(pred, self._p), tf.float32), self._c) self._vali_err = tf.reduce_sum(tf.mul(cor, self._mask)) self._saver = tf.train.Saver([v for v in tf.all_variables() if v.name.find('matDecomp') != -1]) tf.scalar_summary('training_weighted_loss_l2', self._loss) tf.scalar_summary('validation_weighted_loss', self._weighted_loss) merged = tf.merge_all_summaries()
def testSimpleLogistic(self): # Setup test data example_protos = [ make_example_proto( {'age': [0], 'gender': [0]}, 0), make_example_proto( {'age': [1], 'gender': [1]}, 1), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict(symmetric_l2_regularization=0.5, symmetric_l1_regularization=0, loss_type='logistic_loss', prior=0.0) tf.initialize_all_variables().run() lr = SdcaModel(CONTAINER, examples, variables, options) unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) prediction = lr.predictions(examples) self.assertAllClose(0.693147, unregularized_loss.eval()) self.assertAllClose(0.693147, loss.eval()) lr.minimize().run() self.assertAllClose(0.395226, unregularized_loss.eval(), rtol=3e-2, atol=3e-2) self.assertAllClose(0.657446, loss.eval(), rtol=3e-2, atol=3e-2) predicted_labels = tf.cast( tf.greater_equal(prediction, tf.ones_like(prediction) * 0.5), tf.float32) self.assertAllEqual([0, 1], predicted_labels.eval())
def _verify_compatible_image_shapes(img1, img2): """ Checks if two image tensors are compatible for applying SSIM or PSNR. This function checks if two sets of images have ranks at least 3, and if the last three dimensions match. Args: img1: Tensor containing the first image batch. img2: Tensor containing the second image batch. Returns: A tuple containing: the first tensor shape, the second tensor shape, and a list of control_flow_ops.Assert() ops implementing the checks. Raises: ValueError: When static shape check fails. """ shape1 = img1.get_shape().with_rank_at_least(3) shape2 = img2.get_shape().with_rank_at_least(3) shape1[-3:].assert_is_compatible_with(shape2[-3:]) if shape1.ndims is not None and shape2.ndims is not None: for dim1, dim2 in zip(reversed(shape1[:-3]), reversed(shape2[:-3])): if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)): raise ValueError('Two images are not compatible: %s and %s' % (shape1, shape2)) # Now assign shape tensors. shape1, shape2 = tf.shape_n([img1, img2]) # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable. checks = [] checks.append(tf.Assert(tf.greater_equal(tf.size(shape1), 3), [shape1, shape2], summarize=10)) checks.append(tf.Assert(tf.reduce_all(tf.equal(shape1[-3:], shape2[-3:])), [shape1, shape2], summarize=10)) return shape1, shape2, checks
def loop_body(i, a, sample, log_p): """Accumulate hidden state, sample, and log probability for index i.""" # Get weights and bias for time step. w_enc_i = w_enc_arr[i] w_dec_i = w_dec_arr[i] b_dec_i = b_dec_arr[i] cond_p_i, cond_l_i = self._cond_prob(a, w_dec_i, b_dec_i) if temperature is None: v_i = tf.to_float(tf.greater_equal(cond_p_i, 0.5)) else: bernoulli = tfp.distributions.Bernoulli( logits=cond_l_i / temperature, dtype=tf.float32) v_i = bernoulli.sample() # Accumulate sampled values. sample_new = sample + [v_i] # Get log probability for this value. Log space avoids numerical issues. log_p_i = v_i * _safe_log(cond_p_i) + (1 - v_i) * _safe_log(1 - cond_p_i) # Accumulate log probability. log_p_new = log_p + log_p_i # Encode value and add to hidden units. a_new = a + tf.matmul(v_i, w_enc_i) return a_new, sample_new, log_p_new
def matched_column_indicator(self): """Returns column indices that are matched. Returns: column_indices: int32 tensor of shape [K] with column indices. """ return tf.greater_equal(self._match_results, 0)
def _get_values_from_start_and_end(self, input_tensor, num_start_samples, num_end_samples, total_num_samples): """slices num_start_samples and last num_end_samples from input_tensor. Args: input_tensor: An int32 tensor of shape [N] to be sliced. num_start_samples: Number of examples to be sliced from the beginning of the input tensor. num_end_samples: Number of examples to be sliced from the end of the input tensor. total_num_samples: Sum of is num_start_samples and num_end_samples. This should be a scalar. Returns: A tensor containing the first num_start_samples and last num_end_samples from input_tensor. """ input_length = tf.shape(input_tensor)[0] start_positions = tf.less(tf.range(input_length), num_start_samples) end_positions = tf.greater_equal( tf.range(input_length), input_length - num_end_samples) selected_positions = tf.logical_or(start_positions, end_positions) selected_positions = tf.cast(selected_positions, tf.int32) indexed_positions = tf.multiply(tf.cumsum(selected_positions), selected_positions) one_hot_selector = tf.one_hot(indexed_positions - 1, total_num_samples, dtype=tf.int32) return tf.tensordot(input_tensor, one_hot_selector, axes=[0, 0])
def prune_small_boxes(boxlist, min_side, scope=None): """Prunes small boxes in the boxlist which have a side smaller than min_side. Args: boxlist: BoxList holding N boxes. min_side: Minimum width AND height of box to survive pruning. scope: name scope. Returns: A pruned boxlist. """ with tf.name_scope(scope, 'PruneSmallBoxes'): height, width = height_width(boxlist) is_valid = tf.logical_and(tf.greater_equal(width, min_side), tf.greater_equal(height, min_side)) return gather(boxlist, tf.reshape(tf.where(is_valid), [-1]))
def testImbalancedWithExampleWeights(self): # Setup test data with 1 positive, and 3 negative examples. example_protos = [ make_example_proto( {'age': [0], 'gender': [0]}, 0), make_example_proto( {'age': [1], 'gender': [1]}, 1), ] example_weights = [3.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict(symmetric_l2_regularization=0.25, symmetric_l1_regularization=0, loss_type='logistic_loss') tf.initialize_all_variables().run() lr = SdcaModel(CONTAINER, examples, variables, options) unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) prediction = lr.predictions(examples) lr.minimize().run() self.assertAllClose(0.266189, unregularized_loss.eval(), rtol=3e-2, atol=3e-2) self.assertAllClose(0.571912, loss.eval(), rtol=3e-2, atol=3e-2) predicted_labels = tf.cast( tf.greater_equal(prediction, tf.ones_like(prediction) * 0.5), tf.float32) self.assertAllEqual([0, 1], predicted_labels.eval())
def to_absolute_coordinates(keypoints, height, width, check_range=True, scope=None): """Converts normalized keypoint coordinates to absolute pixel coordinates. This function raises an assertion failed error when the maximum keypoint coordinate value is larger than 1.01 (in which case coordinates are already absolute). Args: keypoints: A tensor of shape [num_instances, num_keypoints, 2] height: Maximum value for y coordinate of absolute keypoint coordinates. width: Maximum value for x coordinate of absolute keypoint coordinates. check_range: If True, checks if the coordinates are normalized or not. scope: name scope. Returns: tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates in terms of the image size. """ with tf.name_scope(scope, 'ToAbsoluteCoordinates'): height = tf.cast(height, tf.float32) width = tf.cast(width, tf.float32) # Ensure range of input keypoints is correct. if check_range: max_val = tf.reduce_max(keypoints) max_assert = tf.Assert(tf.greater_equal(1.01, max_val), ['maximum keypoint coordinate value is larger ' 'than 1.01: ', max_val]) with tf.control_dependencies([max_assert]): width = tf.identity(width) return scale(keypoints, height, width)
def _has_enough_pixels_of_each_object_in_first_frame( label, decoder_output_stride): """Checks if for each object (incl. background) enough pixels are visible. During test time, we will usually not see a reference frame in which only very few pixels of one object are visible. These cases can be problematic during training, especially if more than the 1-nearest neighbor is used. That's why this function can be used to detect and filter these cases. Args: label: Label tensor of shape [num_frames, height, width, 1]. decoder_output_stride: Integer, the stride of the decoder output. Returns: Boolean, whether the labels have enough pixels of each object in the first frame. """ h, w = train_utils.resolve_shape(label)[1:3] h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride) w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride) label_downscaled = tf.squeeze( tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h_sub, w_sub], align_corners=True), axis=0) _, _, counts = tf.unique_with_counts( tf.reshape(label_downscaled, [-1])) has_enough_pixels_per_object = tf.reduce_all( tf.greater_equal(counts, MIN_LABEL_COUNT)) return has_enough_pixels_per_object
def to_absolute_coordinates(boxlist, height, width, check_range=True, scope=None): """Converts normalized box coordinates to absolute pixel coordinates. This function raises an assertion failed error when the maximum box coordinate value is larger than 1.01 (in which case coordinates are already absolute). Args: boxlist: BoxList with coordinates in range [0, 1]. height: Maximum value for height of absolute box coordinates. width: Maximum value for width of absolute box coordinates. check_range: If True, checks if the coordinates are normalized or not. scope: name scope. Returns: boxlist with absolute coordinates in terms of the image size. """ with tf.name_scope(scope, 'ToAbsoluteCoordinates'): height = tf.cast(height, tf.float32) width = tf.cast(width, tf.float32) # Ensure range of input boxes is correct. if check_range: box_maximum = tf.reduce_max(boxlist.get()) max_assert = tf.Assert(tf.greater_equal(1.01, box_maximum), ['maximum box coordinate value is larger ' 'than 1.01: ', box_maximum]) with tf.control_dependencies([max_assert]): width = tf.identity(width) return scale(boxlist, height, width)
def apply_stats(self, statsUpdates): """ compute stats and update/apply the new stats to the running average """ def updateAccumStats(): if self._full_stats_init: return tf.cond(tf.greater(self.sgd_step, self._cold_iter), lambda: tf.group(*self._apply_stats(statsUpdates, accumulate=True, accumulateCoeff=1. / self._stats_accum_iter)), tf.no_op) else: return tf.group(*self._apply_stats(statsUpdates, accumulate=True, accumulateCoeff=1. / self._stats_accum_iter)) def updateRunningAvgStats(statsUpdates, fac_iter=1): # return tf.cond(tf.greater_equal(self.factor_step, # tf.convert_to_tensor(fac_iter)), lambda: # tf.group(*self._apply_stats(stats_list, varlist)), tf.no_op) return tf.group(*self._apply_stats(statsUpdates)) if self._async_stats: # asynchronous stats update update_stats = self._apply_stats(statsUpdates) queue = tf.FIFOQueue(1, [item.dtype for item in update_stats], shapes=[ item.get_shape() for item in update_stats]) enqueue_op = queue.enqueue(update_stats) def dequeue_stats_op(): return queue.dequeue() self.qr_stats = tf.train.QueueRunner(queue, [enqueue_op]) update_stats_op = tf.cond(tf.equal(queue.size(), tf.convert_to_tensor( 0)), tf.no_op, lambda: tf.group(*[dequeue_stats_op(), ])) else: # synchronous stats update update_stats_op = tf.cond(tf.greater_equal( self.stats_step, self._stats_accum_iter), lambda: updateRunningAvgStats(statsUpdates), updateAccumStats) self._update_stats_op = update_stats_op return update_stats_op
def K(self, X1, X2=None): r""" Vectorized kernel calc. """ # Turn our inputs into lists of integers using one-hot embedding # first split up strings and pad to fixed length and prep for gpu # pad until all have length of self.maxlen # turn into one-hot i.e. shape (# strings, #characters+1, alphabet size) # tf.strings.bytes_split alternatively X1 = tf.strings.split(tf.squeeze(X1, 1)).to_tensor("PAD", shape=[None, self.maxlen]) X1 = self.table.lookup(X1) # keep track of original input sizes X1_shape = tf.shape(X1)[0] X1 = tf.one_hot(X1, self.alphabet_size + 1, dtype=tf.float64) if X2 is None: X2 = X1 X2_shape = X1_shape self.symmetric = True else: self.symmetric = False X2 = tf.strings.split(tf.squeeze(X2, 1)).to_tensor("PAD", shape=[None, self.maxlen]) X2 = self.table.lookup(X2) X2_shape = tf.shape(X2)[0] X2 = tf.one_hot(X2, self.alphabet_size + 1, dtype=tf.float64) # prep the decay tensors self._precalc() # combine all target strings and remove the ones in the first column that encode the padding (i.e we dont want them to count as a match) X_full = tf.concat([X1, X2], 0)[:, :, 1:] # get indicies of all possible pairings from X and X2 # this way allows maximum number of kernel calcs to be squished onto the GPU (rather than just doing individual rows of gram) indicies_2, indicies_1 = tf.meshgrid(tf.range(0, X1_shape), tf.range(X1_shape, tf.shape(X_full)[0])) indicies = tf.concat([tf.reshape(indicies_1, (-1, 1)), tf.reshape(indicies_2, (-1, 1))], axis=1) if self.symmetric: # if symmetric then only calc upper matrix (fill in rest later) indicies = tf.boolean_mask(indicies, tf.greater_equal(indicies[:, 1] + X1_shape, indicies[:, 0])) else: # if not symmetric need to calculate some extra kernel evals for the normalization later on indicies = tf.concat([indicies, tf.tile(tf.expand_dims(tf.range(tf.shape(X_full)[0]), 1), (1, 2))], 0) # make kernel calcs in batches num_batches = tf.cast(tf.math.ceil(tf.shape(indicies)[0] / self.batch_size), dtype=tf.int32) k_split = tf.TensorArray(tf.float64, size=num_batches, clear_after_read=False, infer_shape=False) # iterate through batches for j in tf.range(num_batches): # collect strings for this batch indicies_batch = indicies[self.batch_size * j:self.batch_size * (j + 1)] X_batch = tf.gather(X_full, indicies_batch[:, 0], axis=0) X2_batch = tf.gather(X_full, indicies_batch[:, 1], axis=0) # Make S: the similarity tensor of shape (# strings, #characters, # characters) # S = tf.matmul( tf.matmul(X_batch,self.sim),tf.transpose(X2_batch,perm=(0,2,1))) S = tf.matmul(X_batch, tf.transpose(X2_batch, perm=(0, 2, 1))) # collect results for the batch result = self.kernel_calc(S) k_split = k_split.write(j, result) # combine batch results k = tf.expand_dims(k_split.concat(), 1) k_split.close() # put results into the right places in the gram matrix and normalize if self.symmetric: # if symmetric then only put in top triangle (inc diag) mask = tf.linalg.band_part(tf.ones((X1_shape, X2_shape), dtype=tf.int64), 0, -1) non_zero = tf.not_equal(mask, tf.constant(0, dtype=tf.int64)) # Extracting the indices of upper triangle elements indices = tf.where(non_zero) out = tf.SparseTensor(indices, tf.squeeze(k), dense_shape=tf.cast((X1_shape, X2_shape), dtype=tf.int64)) k_results = tf.sparse.to_dense(out) # add in mising elements (lower diagonal) k_results = k_results + tf.linalg.set_diag(tf.transpose(k_results), tf.zeros(X1_shape, dtype=tf.float64)) # normalise X_diag_Ks = tf.linalg.diag_part(k_results) norm = tf.tensordot(X_diag_Ks, X_diag_Ks, axes=0) k_results = tf.divide(k_results, tf.sqrt(norm)) else: # otherwise can just reshape into gram matrix # but first take extra kernel calcs off end of k and use them to normalise X_diag_Ks = tf.reshape(k[X1_shape * X2_shape:X1_shape * X2_shape + X1_shape], (-1,)) X2_diag_Ks = tf.reshape(k[-X2_shape:], (-1,)) k = k[0:X1_shape * X2_shape] k_results = tf.transpose(tf.reshape(k, [X2_shape, X1_shape])) # normalise norm = tf.tensordot(X_diag_Ks, X2_diag_Ks, axes=0) k_results = tf.divide(k_results, tf.sqrt(norm)) return k_results
def _build(self, image, gt_boxes=None, is_training=False): """ Returns bounding boxes and classification probabilities. Args: image: A tensor with the image. Its shape should be `(height, width, 3)`. gt_boxes: A tensor with all the ground truth boxes of that image. Its shape should be `(num_gt_boxes, 5)` Where for each gt box we have (x1, y1, x2, y2, label), in that order. is_training: A boolean to whether or not it is used for training. Returns: A dictionary with the following keys: predictions: proposal_prediction: A dictionary with: proposals: The proposals of the network after appling some filters like negative area; and NMS proposals_label: A tensor with the label for each proposal. proposals_label_prob: A tensor with the softmax probability for the label of each proposal. bbox_offsets: A tensor with the predicted bbox_offsets class_scores: A tensor with the predicted classes scores """ # Reshape image self.image_shape.append(3) # Add channels to shape image.set_shape(self.image_shape) image = tf.expand_dims(image, 0, name="hardcode_batch_size_to_1") # Generate feature maps from image self.feature_extractor = SSDFeatureExtractor( self._config.base_network, parent_name=self.module_name) feature_maps = self.feature_extractor(image, is_training=is_training) # Build a MultiBox predictor on top of each feature layer and collect # the bounding box offsets and the category score logits they produce bbox_offsets_list = [] class_scores_list = [] for i, feat_map in enumerate(feature_maps.values()): multibox_predictor_name = "MultiBox_{}".format(i) with tf.name_scope(multibox_predictor_name): num_anchors = self._anchors_per_point[i] # Predict bbox offsets bbox_offsets_layer = Conv2D( num_anchors * 4, [3, 3], name=multibox_predictor_name + "_offsets_conv", )(feat_map) bbox_offsets_flattened = tf.reshape(bbox_offsets_layer, [-1, 4]) bbox_offsets_list.append(bbox_offsets_flattened) # Predict class scores class_scores_layer = Conv2D( num_anchors * (self._num_classes + 1), [3, 3], name=multibox_predictor_name + "_classes_conv", )(feat_map) class_scores_flattened = tf.reshape( class_scores_layer, [-1, self._num_classes + 1]) class_scores_list.append(class_scores_flattened) bbox_offsets = tf.concat(bbox_offsets_list, axis=0, name="concatenate_all_bbox_offsets") class_scores = tf.concat(class_scores_list, axis=0, name="concatenate_all_class_scores") class_probabilities = tf.nn.softmax(class_scores, axis=-1, name="class_probabilities_softmax") # Generate anchors (generated only once, therefore we use numpy) raw_anchors_per_featmap = generate_raw_anchors( feature_maps, self._anchor_min_scale, self._anchor_max_scale, self._anchor_ratios, self._anchors_per_point, ) anchors_list = [] for i, (feat_map_name, feat_map) in enumerate(feature_maps.items()): # TODO: Anchor generation should be simpler. We should create # them in image scale from the start instead of scaling # them to their feature map size. feat_map_shape = feat_map.shape.as_list()[1:3] scaled_bboxes = adjust_bboxes( raw_anchors_per_featmap[feat_map_name], feat_map_shape[0], feat_map_shape[1], self.image_shape[0], self.image_shape[1], ) clipped_bboxes = clip_boxes(scaled_bboxes, self.image_shape) anchors_list.append(clipped_bboxes) anchors = np.concatenate(anchors_list, axis=0) anchors = tf.convert_to_tensor(anchors, dtype=tf.float32) # This is the dict we'll return after filling it with SSD's results prediction_dict = {} # Generate targets for training if gt_boxes is not None: gt_boxes = tf.cast(gt_boxes, tf.float32) # Generate targets target_creator = SSDTarget(self._num_classes, self._config.target, self._config.variances) class_targets, bbox_offsets_targets = target_creator( class_probabilities, anchors, gt_boxes) # Filter the predictions and targets that we will ignore during # training due to hard negative mining. We use class_targets to # know which ones to ignore (they are marked as -1 if they are to # be ignored) with tf.name_scope("hard_negative_mining_filter"): predictions_filter = tf.greater_equal(class_targets, 0) anchors = tf.boolean_mask(anchors, predictions_filter) bbox_offsets_targets = tf.boolean_mask(bbox_offsets_targets, predictions_filter) class_targets = tf.boolean_mask(class_targets, predictions_filter) class_scores = tf.boolean_mask(class_scores, predictions_filter) class_probabilities = tf.boolean_mask(class_probabilities, predictions_filter) bbox_offsets = tf.boolean_mask(bbox_offsets, predictions_filter) # Add target tensors to prediction dict prediction_dict["target"] = { "cls": class_targets, "bbox_offsets": bbox_offsets_targets, "anchors": anchors, } # Add network's raw output to prediction dict prediction_dict["cls_pred"] = class_scores prediction_dict["loc_pred"] = bbox_offsets # We generate proposals when predicting, or when debug=True for # generating visualizations during training. if not is_training or self._debug: proposals_creator = SSDProposal(self._num_classes, self._config.proposals, self._config.variances) proposals = proposals_creator( class_probabilities, bbox_offsets, anchors, tf.cast(tf.shape(image)[1:3], tf.float32), ) prediction_dict["classification_prediction"] = proposals # Add some non essential metrics for debugging if self._debug: prediction_dict["all_anchors"] = anchors prediction_dict["cls_prob"] = class_probabilities return prediction_dict
def inception_model_fn(features, labels, mode, params): """Inception v3 model using Estimator API.""" num_classes = FLAGS.num_classes is_training = (mode == tf.estimator.ModeKeys.TRAIN) is_eval = (mode == tf.estimator.ModeKeys.EVAL) features = tensor_transform_fn(features, params['input_perm']) if FLAGS.clear_update_collections: # updates_collections must be set to None in order to use fused batchnorm with arg_scope( inception.inception_v3_arg_scope( batch_norm_decay=BATCH_NORM_DECAY, batch_norm_epsilon=BATCH_NORM_EPSILON, updates_collections=None)): logits, end_points = inception.inception_v3( features, num_classes, is_training=is_training) else: with arg_scope( inception.inception_v3_arg_scope( batch_norm_decay=BATCH_NORM_DECAY, batch_norm_epsilon=BATCH_NORM_EPSILON)): logits, end_points = inception.inception_v3( features, num_classes, is_training=is_training) predictions = end_points predictions.update({ 'classes': tf.argmax(input=logits, axis=1), 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') }) if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and ( not FLAGS.use_tpu): with tf.control_dependencies([ tf.Print(predictions['classes'], [predictions['classes']], summarize=FLAGS.eval_batch_size, message='prediction: ') ]): labels = tf.Print(labels, [labels], summarize=FLAGS.eval_batch_size, message='label: ') one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32) if 'AuxLogits' in end_points: tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=end_points['AuxLogits'], weights=0.4, label_smoothing=0.1, scope='aux_loss') tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits, weights=1.0, label_smoothing=0.1) loss = tf.losses.get_total_loss(add_regularization_losses=True) initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256 if FLAGS.use_learning_rate_warmup: # Adjust initial learning rate to match final warmup rate warmup_decay = FLAGS.learning_rate_decay**( (FLAGS.warmup_epochs + FLAGS.cold_epochs) / FLAGS.learning_rate_decay_epochs) adj_initial_learning_rate = initial_learning_rate * warmup_decay final_learning_rate = 0.0001 * initial_learning_rate host_call = None train_op = None if is_training: batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size global_step = tf.train.get_or_create_global_step() current_epoch = tf.cast( (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32) learning_rate = tf.train.exponential_decay( learning_rate=initial_learning_rate, global_step=global_step, decay_steps=int(FLAGS.learning_rate_decay_epochs * batches_per_epoch), decay_rate=FLAGS.learning_rate_decay, staircase=True) if FLAGS.use_learning_rate_warmup: wlr = 0.1 * adj_initial_learning_rate wlr_height = tf.cast( 0.9 * adj_initial_learning_rate / (FLAGS.warmup_epochs + FLAGS.learning_rate_decay_epochs - 1), tf.float32) epoch_offset = tf.cast(FLAGS.cold_epochs - 1, tf.int32) exp_decay_start = (FLAGS.warmup_epochs + FLAGS.cold_epochs + FLAGS.learning_rate_decay_epochs) lin_inc_lr = tf.add( wlr, tf.multiply( tf.cast(tf.subtract(current_epoch, epoch_offset), tf.float32), wlr_height)) learning_rate = tf.where( tf.greater_equal(current_epoch, FLAGS.cold_epochs), (tf.where(tf.greater_equal(current_epoch, exp_decay_start), learning_rate, lin_inc_lr)), wlr) # Set a minimum boundary for the learning rate. learning_rate = tf.maximum(learning_rate, final_learning_rate, name='learning_rate') if FLAGS.optimizer == 'sgd': tf.logging.info('Using SGD optimizer') optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) elif FLAGS.optimizer == 'momentum': tf.logging.info('Using Momentum optimizer') optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) elif FLAGS.optimizer == 'RMS': tf.logging.info('Using RMS optimizer') optimizer = tf.train.RMSPropOptimizer(learning_rate, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) else: tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer) if FLAGS.use_tpu: optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=global_step) if FLAGS.moving_average: ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY, num_updates=global_step) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) with tf.control_dependencies([train_op ]), tf.name_scope('moving_average'): train_op = ema.apply(variables_to_average) # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly concatenated to # [params['batch_size']]. gs_t = tf.reshape(global_step, [1]) loss_t = tf.reshape(loss, [1]) lr_t = tf.reshape(learning_rate, [1]) ce_t = tf.reshape(current_epoch, [1]) def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', tf.reduce_mean(loss), step=gs) summary.scalar('learning_rate', tf.reduce_mean(lr), step=gs) summary.scalar('current_epoch', tf.reduce_mean(ce), step=gs) return summary.all_summary_ops() host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t]) eval_metrics = None if is_eval: def metric_fn(labels, logits): """Evaluation metric function. Evaluates accuracy. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `eval_metrics`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `eval_metrics`. Args: labels: `Tensor` with shape `[batch, ]`. logits: `Tensor` with shape `[batch, num_classes]`. Returns: A dict of the metrics to return from evaluation. """ predictions = tf.argmax(logits, axis=1) top_1_accuracy = tf.metrics.accuracy(labels, predictions) in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32) top_5_accuracy = tf.metrics.mean(in_top_5) return { 'accuracy': top_1_accuracy, 'accuracy@5': top_5_accuracy, } eval_metrics = (metric_fn, [labels, logits]) return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, host_call=host_call, eval_metrics=eval_metrics)
def build_losses(pyramid, py_scope, slim_scope, image_height, image_width, outputs, gt_boxes, gt_masks, num_classes, base_anchors, rpn_box_lw=0.1, rpn_cls_lw=0.1, rcnn_box_lw=1.0, rcnn_cls_lw=0.1, mask_lw=1.0): """Building 3-way output losses, totally 5 losses Params: ------ outputs: output of build_heads gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class] gt_masks: A tensor of shape (G, ih, iw), {0, 1}Ì[MaÌ[MaÌ]] *_lw: loss weight of rpn, rcnn and mask losses Returns: ------- l: a loss tensor """ # losses for pyramid losses = [] rpn_box_losses, rpn_cls_losses = [], [] rcnn_box_losses, rcnn_cls_losses = [], [] mask_losses = [] # watch some info during training rpn_batch = [] rcnn_batch = [] mask_batch = [] rpn_batch_pos = [] rcnn_batch_pos = [] mask_batch_pos = [] # if _BN is True: # arg_scope = _extra_conv_arg_scope_with_bn() # # arg_scope = _extra_conv_arg_scope_with_bn(is_training=True) # else: # arg_scope = _extra_conv_arg_scope(activation_fn=tf.nn.relu) with tf.name_scope(py_scope) as py_scope: with slim.arg_scope(slim_scope) as slim_scope: ## assigning gt_boxes [assigned_gt_boxes, assigned_layer_inds] = assign_boxes(gt_boxes, [gt_boxes], [2, 3, 4, 5]) ## build losses for PFN for i in range(5, 1, -1): p = 'P%d' % i stride = 2**i shape = tf.shape(pyramid[p]) height, width = shape[1], shape[2] splitted_gt_boxes = assigned_gt_boxes[i - 2] ### rpn losses # 1. encode ground truth # 2. compute distances all_anchors = outputs['rpn'][p]['anchor'] rpn_boxes = outputs['rpn'][p]['box'] rpn_clses = tf.reshape(outputs['rpn'][p]['cls'], (1, height, width, base_anchors, 2)) rpn_clses_target, rpn_boxes_target, rpn_boxes_inside_weight = \ anchor_encoder(splitted_gt_boxes, all_anchors, height, width, stride, image_height, image_width, scope='AnchorEncoder') rpn_clses_target, rpn_clses, rpn_boxes, rpn_boxes_target, rpn_boxes_inside_weight = \ _filter_negative_samples(tf.reshape(rpn_clses_target, [-1]), [ tf.reshape(rpn_clses_target, [-1]), tf.reshape(rpn_clses, [-1, 2]), tf.reshape(rpn_boxes, [-1, 4]), tf.reshape(rpn_boxes_target, [-1, 4]), tf.reshape(rpn_boxes_inside_weight, [-1, 4]) ]) rpn_batch.append( tf.reduce_sum( tf.cast(tf.greater_equal(rpn_clses_target, 0), tf.float32))) rpn_batch_pos.append( tf.reduce_sum( tf.cast(tf.greater_equal(rpn_clses_target, 1), tf.float32))) rpn_box_loss = rpn_boxes_inside_weight * _smooth_l1_dist( rpn_boxes, rpn_boxes_target) rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4]) rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1) rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_box_loss) rpn_box_losses.append(rpn_box_loss) ### NOTE: examples with negative labels are ignore when compute one_hot_encoding and entropy losses # BUT these examples still count when computing the average of softmax_cross_entropy, # the loss become smaller by a factor (None_negtive_labels / all_labels) # the BEST practise still should be gathering all none-negative examples rpn_clses_target = slim.one_hot_encoding( rpn_clses_target, 2, on_value=1.0, off_value=0.0) # this will set -1 label to all zeros rpn_cls_loss = rpn_cls_lw * tf.nn.softmax_cross_entropy_with_logits( labels=rpn_clses_target, logits=rpn_clses) rpn_cls_loss = tf.reduce_mean(rpn_cls_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_cls_loss) rpn_cls_losses.append(rpn_cls_loss) ### rcnn losses # 1. encode ground truth # 2. compute distances rcnn_ordered_rois = outputs['rcnn_ordered_rois'] rcnn_boxes = outputs['rcnn_boxes'] rcnn_clses = outputs['rcnn_clses'] rcnn_scores = outputs['rcnn_scores'] rcnn_clses_target, rcnn_boxes_target, rcnn_boxes_inside_weight = \ roi_encoder(gt_boxes, rcnn_ordered_rois, num_classes, scope='ROIEncoder') rcnn_clses_target, rcnn_ordered_rois, rcnn_clses, rcnn_scores, rcnn_boxes, rcnn_boxes_target, rcnn_boxes_inside_weight = \ _filter_negative_samples(tf.reshape(rcnn_clses_target, [-1]),[ tf.reshape(rcnn_clses_target, [-1]), tf.reshape(rcnn_ordered_rois, [-1, 4]), tf.reshape(rcnn_clses, [-1, num_classes]), tf.reshape(rcnn_scores, [-1, num_classes]), tf.reshape(rcnn_boxes, [-1, num_classes * 4]), tf.reshape(rcnn_boxes_target, [-1, num_classes * 4]), tf.reshape(rcnn_boxes_inside_weight, [-1, num_classes * 4]) ] ) rcnn_batch.append( tf.reduce_sum( tf.cast(tf.greater_equal(rcnn_clses_target, 0), tf.float32))) rcnn_batch_pos.append( tf.reduce_sum( tf.cast(tf.greater_equal(rcnn_clses_target, 1), tf.float32))) rcnn_box_loss = rcnn_boxes_inside_weight * _smooth_l1_dist( rcnn_boxes, rcnn_boxes_target) rcnn_box_loss = tf.reshape(rcnn_box_loss, [-1, 4]) rcnn_box_loss = tf.reduce_sum(rcnn_box_loss, axis=1) rcnn_box_loss = rcnn_box_lw * tf.reduce_mean( rcnn_box_loss) # * frac_ tf.add_to_collection(tf.GraphKeys.LOSSES, rcnn_box_loss) rcnn_box_losses.append(rcnn_box_loss) rcnn_clses_target = slim.one_hot_encoding(rcnn_clses_target, num_classes, on_value=1.0, off_value=0.0) rcnn_cls_loss = rcnn_cls_lw * tf.nn.softmax_cross_entropy_with_logits( labels=rcnn_clses_target, logits=rcnn_clses) rcnn_cls_loss = tf.reduce_mean(rcnn_cls_loss) # * frac_ tf.add_to_collection(tf.GraphKeys.LOSSES, rcnn_cls_loss) rcnn_cls_losses.append(rcnn_cls_loss) outputs['training_rcnn_rois'] = rcnn_ordered_rois outputs['training_rcnn_clses_target'] = rcnn_clses_target outputs['training_rcnn_clses'] = rcnn_clses outputs['training_rcnn_scores'] = rcnn_scores ### mask loss # mask of shape (N, h, w, num_classes) mask_ordered_rois = outputs['mask_ordered_rois'] masks = outputs['mask_mask'] mask_clses_target, mask_targets, mask_inside_weights, mask_rois = \ mask_encoder(gt_masks, gt_boxes, mask_ordered_rois, num_classes, 28, 28,scope='MaskEncoder') mask_clses_target, mask_targets, mask_inside_weights, mask_rois, masks = \ _filter_negative_samples(tf.reshape(mask_clses_target, [-1]), [ tf.reshape(mask_clses_target, [-1]), tf.reshape(mask_targets, [-1, 28, 28, num_classes]), tf.reshape(mask_inside_weights, [-1, 28, 28, num_classes]), tf.reshape(mask_rois, [-1, 4]), tf.reshape(masks, [-1, 28, 28, num_classes]), ]) mask_batch.append( tf.reduce_sum( tf.cast(tf.greater_equal(mask_clses_target, 0), tf.float32))) mask_batch_pos.append( tf.reduce_sum( tf.cast(tf.greater_equal(mask_clses_target, 1), tf.float32))) ### NOTE: w/o competition between classes. mask_loss = mask_inside_weights * tf.nn.sigmoid_cross_entropy_with_logits( labels=mask_targets, logits=masks) mask_loss = mask_lw * mask_loss mask_loss = tf.reduce_mean(mask_loss) mask_loss = tf.cond(tf.greater(tf.size(mask_clses_target), 0), lambda: mask_loss, lambda: tf.constant(0.0)) tf.add_to_collection(tf.GraphKeys.LOSSES, mask_loss) mask_losses.append(mask_loss) outputs['training_mask_rois'] = mask_rois outputs['training_mask_clses_target'] = mask_clses_target outputs['training_mask_final_mask'] = tf.nn.sigmoid(masks) outputs['training_mask_final_mask_target'] = mask_targets rpn_box_losses = tf.add_n(rpn_box_losses) rpn_cls_losses = tf.add_n(rpn_cls_losses) rcnn_box_losses = tf.add_n(rcnn_box_losses) rcnn_cls_losses = tf.add_n(rcnn_cls_losses) mask_losses = tf.add_n(mask_losses) losses = [ rpn_box_losses, rpn_cls_losses, rcnn_box_losses, rcnn_cls_losses, mask_losses ] total_loss = tf.add_n(losses) rpn_batch = tf.cast(tf.add_n(rpn_batch), tf.float32) rcnn_batch = tf.cast(tf.add_n(rcnn_batch), tf.float32) mask_batch = tf.cast(tf.add_n(mask_batch), tf.float32) rpn_batch_pos = tf.cast(tf.add_n(rpn_batch_pos), tf.float32) rcnn_batch_pos = tf.cast(tf.add_n(rcnn_batch_pos), tf.float32) mask_batch_pos = tf.cast(tf.add_n(mask_batch_pos), tf.float32) return total_loss, losses, [rpn_batch_pos, rpn_batch, \ rcnn_batch_pos, rcnn_batch, \ mask_batch_pos, mask_batch]
############################################################################### x = tf.random_uniform([]) # Empty array as shape creates a scalar. y = tf.random_uniform([]) out = tf.cond(tf.greater(x, y), lambda: x + y, lambda: x - y) print(sess.run(out)) ############################################################################### # 1b: Create two 0-d tensors x and y randomly selected from the range [-1, 1). # Return x + y if x < y, x - y if x > y, 0 otherwise. # Hint: Look up tf.case(). ############################################################################### x = tf.random_uniform([],-1,1) y = tf.random_uniform([],-1,1) out = tf.cond(tf.greater_equal(x,y), lambda: x - y, lambda: x + y) print(sess.run(out)) ############################################################################### # 1c: Create the tensor x of the value [[0, -2, -1], [0, 1, 2]] # and y as a tensor of zeros with the same shape as x. # Return a boolean tensor that yields Trues if x equals y element-wise. # Hint: Look up tf.equal(). ############################################################################### x = tf.constant([[0,-2,-1],[0,1,2]]) y = tf.zeros_like(x) out = tf.equal(x,y) print(sess.run(out)) ###############################################################################
def _network(inputs, image_shape, gt_bboxes): if 'backbones' not in sys.path: sys.path.append('backbones') cnn = import_module(frc.BACKBONE, package='backbones') # CNN feature_map = cnn.inference(inputs) features = slim.conv2d(feature_map, 512, [3, 3], normalizer_fn=slim.batch_norm, normalizer_params={'decay': 0.995, 'epsilon': 0.0001}, weights_regularizer=slim.l2_regularizer(frc.L2_WEIGHT), scope='rpn_feature') # RPN rpn_cls_loss, rpn_cls_acc, rpn_bbox_loss, rois, labels, bbox_targets = rpn_batch(features, image_shape, gt_bboxes) # RCNN # Get cls_score in shape of [FASTER_RCNN_MINIBATCH_SIZE, CLS_NUM + 1] # Get bbox_pred in shape of [FASTER_RCNN_MINIBATCH_SIZE, 4 * (CLS_NUM + 1)] cls_score, bbox_pred = faster_rcnn(features, rois) cls_prob = slim.softmax(cls_score) cls_categories = tf.cast(tf.argmax(cls_prob, axis=1), dtype=tf.int32) rcnn_cls_acc = tf.reduce_mean(tf.cast(tf.equal(cls_categories, tf.cast(tf.reshape(labels, [-1]), tf.int32)), tf.float32)) final_bbox_list, final_score_list, final_categories_list = batchwise_process_faster_rcnn(rois, bbox_pred, cls_prob, image_shape) rcnn_bbox_loss, rcnn_cls_loss = build_faster_rcnn_losses(bbox_pred, bbox_targets, cls_prob, labels, frc.NUM_CLS + 1) # ------------------------------BEGIN SUMMARY-------------------------------- # Image summary for RPN rois class_names = frc.CLS_NAMES + ['circle', 'rectangle', 'triangle'] display_rois_img = tf.reshape(inputs[0], shape=[frc.IMAGE_SHAPE[0], frc.IMAGE_SHAPE[1], 3]) with tf.name_scope('rpn_image_summary'): display_BG_indices = tf.reshape(tf.where(tf.equal(labels[0], 0)), [-1]) display_FG_indices = tf.reshape(tf.where(tf.not_equal(labels[0], 0)), [-1]) display_BG_rois = tf.gather(rois[0], display_BG_indices) display_FG_rois = tf.gather(rois[0], display_FG_indices) display_BG_img = tf.py_func(draw_rectangle, [display_rois_img, display_BG_rois], [tf.uint8]) display_FG_img = tf.py_func(draw_rectangle, [display_rois_img, display_FG_rois], [tf.uint8]) tf.summary.image('class_rois/BG', display_BG_img) tf.summary.image('class_rois/FG', display_FG_img) # Add predicted bbox with confidence 0.25, 0.5, 0.75 and ground truth in image summary. with tf.name_scope('rcnn_image_summary'): final_bbox = final_bbox_list[0] final_score = final_score_list[0] final_categories = final_categories_list[0] display_indices_25 = tf.reshape(tf.where(tf.greater_equal(final_score, 0.25) & tf.less(final_score, 0.5) & tf.not_equal(final_categories, 0)), [-1]) display_indices_50 = tf.reshape(tf.where(tf.greater_equal(final_score, 0.5) & tf.less(final_score, 0.75) & tf.not_equal(final_categories, 0)), [-1]) display_indices_75 = tf.reshape(tf.where(tf.greater_equal(final_score, 0.75) & tf.not_equal(final_categories, 0)), [-1]) display_bboxes_25 = tf.gather(final_bbox, display_indices_25) display_bboxes_50 = tf.gather(final_bbox, display_indices_50) display_bboxes_75 = tf.gather(final_bbox, display_indices_75) display_categories_25 = tf.gather(final_categories, display_indices_25) display_categories_50 = tf.gather(final_categories, display_indices_50) display_categories_75 = tf.gather(final_categories, display_indices_75) show_gt = tf.reshape(tf.gather(gt_bboxes[:, 1:], tf.where(tf.equal(gt_bboxes[:, 0], 0))), [-1, 5]) display_image_25 = tf.py_func(draw_rectangle_with_name, [display_rois_img, display_bboxes_25, display_categories_25, class_names], [tf.uint8]) display_image_50 = tf.py_func(draw_rectangle_with_name, [display_rois_img, display_bboxes_50, display_categories_50, class_names], [tf.uint8]) display_image_75 = tf.py_func(draw_rectangle_with_name, [display_rois_img, display_bboxes_75, display_categories_75, class_names], [tf.uint8]) display_image_gt = tf.py_func(draw_rectangle_with_name, [display_rois_img, show_gt[:, :-1], show_gt[:, -1], class_names], [tf.uint8]) tf.summary.image('detection/gt', display_image_gt) tf.summary.image('detection/25', display_image_25) tf.summary.image('detection/50', display_image_50) tf.summary.image('detection/75', display_image_75) # -------------------------------END SUMMARY--------------------------------- loss_dict = {'rpn_cls_loss': rpn_cls_loss, 'rpn_bbox_loss': rpn_bbox_loss, 'rcnn_cls_loss': rcnn_cls_loss, 'rcnn_bbox_loss': rcnn_bbox_loss} acc_dict = {'rpn_cls_acc': rpn_cls_acc, 'rcnn_cls_acc': rcnn_cls_acc} return final_bbox, final_score, final_categories, loss_dict, acc_dict
def get_symetric_census(img, kernel_size=(3, 3), index=None, debug=False): img_shape = img.get_shape().as_list() if len(img_shape) == 3: img = tf.reduce_mean(img, 2) if len(img_shape) == 4: img = tf.reduce_mean(img[0], 2) # Suppose that image size is H x W img_h, img_w = img.get_shape().as_list() # Census kernel size c_h, c_w = kernel_size # Get meshgrid for the whole original image x_img_flat, y_img_flat = get_mesh_grid_per_img(img_w, img_h) # Reshape to (HxW, 1) x_img_col = tf.reshape(x_img_flat, [img_h * img_w, 1]) y_img_col = tf.reshape(y_img_flat, [img_h * img_w, 1]) # Zero pad the images p_h, p_w = int(c_h / 2), int(c_w / 2) img = tf.pad(img, [[p_h, p_h], [p_w, p_w]]) # Image now is bigger, after padding pad_img_w, pad_img_h = img_w + 2 * p_w, img_h + 2 * p_h # Get meshgrid for the base kernel x_kernel_flat, y_kernel_flat = get_mesh_grid_per_img(c_w, c_h) # Compute indices for img_h*img_w patches patch_indices = (y_img_col + y_kernel_flat) * pad_img_w + (x_img_col + x_kernel_flat) # x_start, y_start = 0, 0 # patch_indices = (y_kernel_flat + y_start)*pad_img_w + (x_kernel_flat + x_start) patch_indices = tf.cast(patch_indices, tf.int32) # Flatten the image img_flat = tf.reshape(img, [-1]) # Obtain the patch patch_flat = tf.gather(img_flat, patch_indices) # Reverse the patch patch_flat_trans = tf.reverse(patch_flat, [1]) # Get the census for every patch patch_censuses = tf.greater_equal( patch_flat, patch_flat_trans)[:, 0:int(c_w * c_h / 2)] # (NxHxW, 31) # Convert to binary pixel_censuses = tf.reduce_sum( tf.cast(tf.reverse(tensor=patch_censuses, axis=[1]), dtype=tf.float32) * 2**tf.range(tf.cast(int(c_w * c_h / 2), dtype=tf.float32)), 1) / 2**int(c_w * c_h / 2) # # Compute census value number for every patch # pixel_censuses = tf.reduce_sum(tf.cast(patch_censuses, dtype=tf.float32), 1) # Reshape to original image size img_censuses = tf.reshape(pixel_censuses, [img_h, img_w, 1]) if debug: return img, patch_flat[index], pixel_censuses[index], img_censuses else: return img_censuses
def _instance_process(instance_rois, instance_bbox_pred, instance_scores, instance_image_shape): bboxes_pred_list = tf.unstack(instance_bbox_pred, axis=1) score_list = tf.unstack(instance_scores, axis=1) all_cls_bboxex = [] all_cls_scores = [] categories = [] for i in range(frc.NUM_CLS + 1): encoded_bbox = bboxes_pred_list[i] score = score_list[i] decoded_bbox = decode_bboxes( encoded_bbox, instance_rois, scale_factor=None) # frc.ROI_SCALE_FACTORS # clip bounding to image shape predict_x_min, predict_y_min, predict_x_max, predict_y_max = tf.unstack( decoded_bbox, axis=1) image_height, image_width = tf.to_float( instance_image_shape[0]), tf.to_float(instance_image_shape[1]) # Clip predict coordinates in image shape (exclude padding zeros). predict_x_min = tf.maximum( 0., tf.minimum(image_width - 1, predict_x_min)) predict_y_min = tf.maximum( 0., tf.minimum(image_height - 1, predict_y_min)) predict_x_max = tf.maximum( 0., tf.minimum(image_width - 1, predict_x_max)) predict_y_max = tf.maximum( 0., tf.minimum(image_height - 1, predict_y_max)) predict_bboxes = tf.stack( [predict_x_min, predict_y_min, predict_x_max, predict_y_max], axis=1) # NMS keep_ind = tf.image.non_max_suppression( predict_bboxes, score, frc.FASTER_RCNN_NMS_MAX_BOX_PER_CLASS, frc.FASTER_RCNN_NMS_IOU_THRESHOLD) per_cls_bboxes = tf.gather(predict_bboxes, keep_ind) per_cls_scores = tf.gather(score, keep_ind) all_cls_bboxex.append(per_cls_bboxes) all_cls_scores.append(per_cls_scores) categories.append(i * tf.ones_like(per_cls_scores, dtype=tf.int32)) final_bboxes = tf.reshape(tf.concat(all_cls_bboxex, axis=0), [-1, 4]) final_scores = tf.reshape(tf.concat(all_cls_scores, axis=0), [-1]) categories = tf.reshape(tf.concat(categories, axis=0), [-1]) # assert_op = tf.assert_greater_equal(frc.FASTER_RCNN_OUTPUT_NUM_PER_IMAGE_IN_BATCH, tf.shape(final_scores)[0]) # with tf.control_dependencies([assert_op]): # If obtained targets less than configure value, padding them. Otherwise random choice(Not available now). final_bboxes, final_scores, categories = tf.cond( tf.greater_equal(frc.FASTER_RCNN_OUTPUT_NUM_PER_IMAGE_IN_BATCH, tf.shape(categories)[0]), true_fn=lambda: _padding_output(final_bboxes, final_scores, categories), false_fn=lambda: (final_bboxes[:frc.FASTER_RCNN_OUTPUT_NUM_PER_IMAGE_IN_BATCH], final_scores[:frc.FASTER_RCNN_OUTPUT_NUM_PER_IMAGE_IN_BATCH], categories[:frc.FASTER_RCNN_OUTPUT_NUM_PER_IMAGE_IN_BATCH])) return final_bboxes, final_scores, categories
def get_train_ops(loss, tf_variables, train_step, clip_mode=None, grad_bound=None, l2_reg=1e-4, lr_warmup_val=None, lr_warmup_steps=100, lr_init=0.1, lr_dec_start=0, lr_dec_every=10000, lr_dec_rate=0.1, lr_dec_min=None, lr_cosine=False, lr_max=None, lr_min=None, lr_T_0=None, lr_T_mul=None, num_train_batches=None, optim_algo=None, sync_replicas=False, num_aggregate=None, num_replicas=None, get_grad_norms=False, moving_average=None): """ Args: clip_mode: "global", "norm", or None. moving_average: store the moving average of parameters """ if l2_reg > 0: l2_losses = [] for var in tf_variables: l2_losses.append(tf.reduce_sum(var**2)) l2_loss = tf.add_n(l2_losses) loss += l2_reg * l2_loss grads = tf.gradients(loss, tf_variables) grad_norm = tf.linalg.global_norm(grads) grad_norms = {} for v, g in zip(tf_variables, grads): if v is None or g is None: continue if isinstance(g, tf.IndexedSlices): grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values**2)) else: grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g**2)) if clip_mode is not None: assert grad_bound is not None, "Need grad_bound to clip gradients." if clip_mode == "global": grads, _ = tf.clip_by_global_norm(grads, grad_bound) elif clip_mode == "norm": clipped = [] for g in grads: if isinstance(g, tf.IndexedSlices): c_g = tf.clip_by_norm(g.values, grad_bound) c_g = tf.IndexedSlices(g.indices, c_g) else: c_g = tf.clip_by_norm(g, grad_bound) clipped.append(g) grads = clipped else: raise NotImplementedError("Unknown clip_mode {}".format(clip_mode)) if lr_cosine: assert lr_max is not None, "Need lr_max to use lr_cosine" assert lr_min is not None, "Need lr_min to use lr_cosine" assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine" assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine" assert num_train_batches is not None, ("Need num_train_batches to use" " lr_cosine") curr_epoch = train_step // num_train_batches last_reset = tf.Variable(0, dtype=tf.int32, trainable=False, name="last_reset") T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i") T_curr = curr_epoch - last_reset def _update(): update_last_reset = tf.compat.v1.assign(last_reset, curr_epoch, use_locking=True) update_T_i = tf.compat.v1.assign(T_i, T_i * lr_T_mul, use_locking=True) with tf.control_dependencies([update_last_reset, update_T_i]): rate = tf.cast(T_curr, tf.float32) / tf.cast( T_i, tf.float32) * 3.1415926 lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) return lr def _no_update(): rate = tf.cast(T_curr, tf.float32) / tf.cast( T_i, tf.float32) * 3.1415926 lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) return lr learning_rate = tf.cond(tf.greater_equal(T_curr, T_i), _update, _no_update) else: learning_rate = tf.compat.v1.train.exponential_decay( lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every, lr_dec_rate, staircase=True) if lr_dec_min is not None: learning_rate = tf.maximum(learning_rate, lr_dec_min) if lr_warmup_val is not None: learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps), lambda: lr_warmup_val, lambda: learning_rate) # if get_grad_norms: # g_1, g_2 = 0.0001, 0.0001 # for v, g in zip(tf_variables, grads): # if g is not None: # if isinstance(g, tf.IndexedSlices): # g_n = tf.reduce_sum(g.values ** 2) # else: # g_n = tf.reduce_sum(g ** 2) # if "enas_cell" in v.name: # print("g_1: {}".format(v.name)) # g_1 += g_n # else: # print("g_2: {}".format(v.name)) # g_2 += g_n # learning_rate = tf.Print(learning_rate, [g_1, g_2, tf.sqrt(g_1 / g_2)], # message="g_1, g_2, g_1/g_2: ", summarize=5) if optim_algo == "momentum": opt = tf.compat.v1.train.MomentumOptimizer(learning_rate, 0.9, use_locking=True, use_nesterov=True) elif optim_algo == "sgd": opt = tf.compat.v1.train.GradientDescentOptimizer(learning_rate, use_locking=True) elif optim_algo == "adam": opt = tf.compat.v1.train.AdamOptimizer(learning_rate, beta1=0.0, epsilon=1e-3, use_locking=True) else: raise ValueError("Unknown optim_algo {}".format(optim_algo)) if sync_replicas: assert num_aggregate is not None, "Need num_aggregate to sync." assert num_replicas is not None, "Need num_replicas to sync." opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=num_aggregate, total_num_replicas=num_replicas, use_locking=True) if moving_average is not None: opt = tf.contrib.opt.MovingAverageOptimizer( opt, average_decay=moving_average) train_op = opt.apply_gradients(zip(grads, tf_variables), global_step=train_step) if get_grad_norms: return train_op, learning_rate, grad_norm, opt, grad_norms else: return train_op, learning_rate, grad_norm, opt
def create_network(opt): #parameter height = opt.height width = opt.width in_depth = opt.in_depth out_depth = opt.out_depth lambda_A = opt.lambda_A EPS = 1e-12 starter_learning_rate = 0.0002 end_learning_rate = 0.0 start_decay_step = 200000 decay_steps = 50000 # start_decay_step = 200 # decay_steps = 20 beta1 = 0.5 global_step_B = tf.Variable(0, trainable=False) # for blur generator global_step_S = tf.Variable(0, trainable=False) # for sharp generator global_step_T = tf.Variable(0, trainable=False) # for total discriminator Model = collections.namedtuple("Model", ['global_step_T','learning_rate_B', 'learning_rate_S', 'learning_rate_T', 'data', 'is_training', 'input_A', 'input_B', 'fake_blur_B', 'fake_B', 'd_B_solver',\ 'g_B_solver', 'd_S_solver', 'g_S_solver', 'd_T_solver', 'g_T_solver', 'g_B_loss_L1_summary', 'g_B_loss_GAN_summary', 'd_B_loss_sum', 'g_S_loss_L1_summary', 'g_S_loss_GAN_summary', 'd_S_loss_sum','g_T_loss_L1_summary', 'g_T_loss_GAN_summary', 'd_T_loss_sum']) #placeholder/input data = tf.placeholder(tf.float32, [None, height, width * 3, in_depth], name="data_AB") is_training = tf.placeholder(tf.bool, name="is_training") input_B, input_A, blur_B = transform( data[:, :, :opt.width, :], data[:, :, opt.width:opt.width * 2 - 1, :], data[:, :, opt.width * 2:, :], width + 10, width) #generator with tf.variable_scope("generatorB"): # blur generator fake_blur_B = generator(input_A, is_training, opt) with tf.variable_scope("generatorS"): # sharp generator fake_B = generator(fake_blur_B, is_training, opt) #discriminator d_B_real = discriminator(input_A, blur_B, opt, update_collection=None, name="discriminatorB") d_B_fake = discriminator(input_A, fake_blur_B, opt, update_collection="NO_OPS", name="discriminatorB", reuse=True) d_S_real = discriminator(blur_B, input_B, opt, update_collection=None, name="discriminatorS") d_S_fake = discriminator(blur_B, fake_B, opt, update_collection="NO_OPS", name="discriminatorS", reuse=True) d_T_real = discriminator(input_A, input_B, opt, update_collection=None, name="discriminatorT") d_T_fake = discriminator(input_A, fake_B, opt, update_collection="NO_OPS", name="discriminatorT", reuse=True) #loss with tf.variable_scope("discriminator_loss"): d_B_loss = tf.reduce_mean(-(tf.log(d_B_real + EPS) + tf.log(1 - d_B_fake + EPS))) d_S_loss = tf.reduce_mean(-(tf.log(d_S_real + EPS) + tf.log(1 - d_S_fake + EPS))) d_T_loss = tf.reduce_mean(-(tf.log(d_T_real + EPS) + tf.log(1 - d_T_fake + EPS))) with tf.variable_scope("generator_loss"): g_B_loss_GAN = tf.reduce_mean(-tf.log(d_B_fake + EPS)) g_B_loss_L1 = tf.reduce_mean(tf.abs(blur_B - fake_blur_B)) g_B_loss = g_B_loss_GAN + g_B_loss_L1 * lambda_A g_S_loss_GAN = tf.reduce_mean(-tf.log(d_S_fake + EPS)) g_S_loss_L1 = tf.reduce_mean(tf.abs(input_B - fake_B)) g_S_loss = g_S_loss_GAN + g_S_loss_L1 * lambda_A g_T_loss_GAN = tf.reduce_mean(-tf.log(d_T_fake + EPS)) g_T_loss_L1 = tf.reduce_mean(tf.abs(input_B - fake_B)) g_T_loss = g_T_loss_GAN + g_T_loss_L1 * lambda_A #tensorboard summary g_B_loss_L1_summary = tf.summary.scalar("g_B_loss_L1", g_B_loss_L1) g_B_loss_GAN_summary = tf.summary.scalar("g_B_loss_GAN", g_B_loss_GAN) d_B_loss_sum = tf.summary.scalar("d_B_loss", d_B_loss) g_S_loss_L1_summary = tf.summary.scalar("g_S_loss_L1", g_S_loss_L1) g_S_loss_GAN_summary = tf.summary.scalar("g_S_loss_GAN", g_S_loss_GAN) d_S_loss_sum = tf.summary.scalar("d_S_loss", d_S_loss) g_T_loss_L1_summary = tf.summary.scalar("g_T_loss_L1", g_T_loss_L1) g_T_loss_GAN_summary = tf.summary.scalar("g_T_loss_GAN", g_T_loss_GAN) d_T_loss_sum = tf.summary.scalar("d_T_loss", d_T_loss) # optimizer learning_rate_B = (tf.where( tf.greater_equal(global_step_B, start_decay_step), tf.train.polynomial_decay(starter_learning_rate, global_step_B - start_decay_step, decay_steps, end_learning_rate, power=1.0), starter_learning_rate)) learning_rate_S = (tf.where( tf.greater_equal(global_step_S, start_decay_step), tf.train.polynomial_decay(starter_learning_rate, global_step_S - start_decay_step, decay_steps, end_learning_rate, power=1.0), starter_learning_rate)) learning_rate_T = (tf.where( tf.greater_equal(global_step_T, start_decay_step), tf.train.polynomial_decay(starter_learning_rate, global_step_T - start_decay_step, decay_steps, end_learning_rate, power=1.0), starter_learning_rate)) trainable_variables_DB = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminatorB') trainable_variables_GB = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='generatorB') trainable_variables_DS = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminatorS') trainable_variables_GS = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='generatorS') trainable_variables_DT = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminatorT') trainable_variables_GT = trainable_variables_GB + trainable_variables_GS # print(trainable_variables_GB) # print(trainable_variables_GS) # print(trainable_variables_GT) d_B_solver = tf.train.AdamOptimizer(learning_rate_T, 0.5).minimize( d_B_loss, global_step=global_step_B, var_list=trainable_variables_DB) g_B_solver = tf.train.AdamOptimizer(learning_rate_T, 0.5).minimize( g_B_loss, var_list=trainable_variables_GB) d_S_solver = tf.train.AdamOptimizer(0.0002, 0.5).minimize( d_S_loss, global_step=global_step_S, var_list=trainable_variables_DS) g_S_solver = tf.train.AdamOptimizer(0.0002, 0.5).minimize( g_S_loss, var_list=trainable_variables_GS) d_T_solver = tf.train.AdamOptimizer(learning_rate_T, 0.5).minimize( d_T_loss, global_step=global_step_T, var_list=trainable_variables_DT) g_T_solver = tf.train.AdamOptimizer(learning_rate_T, 0.5).minimize( g_T_loss, var_list=trainable_variables_GT) return Model(global_step_T=global_step_T, input_A=input_A, input_B=input_B, learning_rate_B=learning_rate_B, learning_rate_S=learning_rate_S, learning_rate_T=learning_rate_T, is_training=is_training, data=data, fake_blur_B=fake_blur_B, fake_B=fake_B, d_B_solver=d_B_solver, g_B_solver=g_B_solver, d_S_solver=d_S_solver, g_S_solver=g_S_solver, d_T_solver=d_T_solver, g_T_solver=g_T_solver, g_B_loss_L1_summary=g_B_loss_L1_summary, g_B_loss_GAN_summary=g_B_loss_GAN_summary, d_B_loss_sum=d_B_loss_sum, g_S_loss_L1_summary=g_S_loss_L1_summary, g_S_loss_GAN_summary=g_S_loss_GAN_summary, d_S_loss_sum=d_S_loss_sum, g_T_loss_L1_summary=g_T_loss_L1_summary, g_T_loss_GAN_summary=g_T_loss_GAN_summary, d_T_loss_sum=d_T_loss_sum)
def _build_graph(self): with tf.variable_scope('feature_extractor'): feat1, feat2, feat3, feat4, feat5, feat6, feat7 = self._feature_extractor( self.images) axes = 3 if self.data_format == 'channels_last' else 1 feat1 = tf.nn.l2_normalize(feat1, axis=axes) channels = tf.shape(feat1)[axes] norm_factor = tf.get_variable( 'l2_norm_factor', shape=[1], initializer=tf.constant_initializer(20.)) norm_factor = tf.tile(norm_factor, [channels]) if self.data_format == 'channels_last': norm_factor = tf.reshape(norm_factor, [1, 1, 1, -1]) else: norm_factor = tf.reshape(norm_factor, [1, -1, 1, 1]) feat1 = norm_factor * feat1 with tf.variable_scope('regressor'): pred1 = self._conv_layer(feat1, 4 * (self.num_classes + 4), 3, 1, 'pred1') pred2 = self._conv_layer(feat2, 6 * (self.num_classes + 4), 3, 1, 'pred2') pred3 = self._conv_layer(feat3, 6 * (self.num_classes + 4), 3, 1, 'pred3') pred4 = self._conv_layer(feat4, 6 * (self.num_classes + 4), 3, 1, 'pred4') pred5 = self._conv_layer(feat5, 6 * (self.num_classes + 4), 3, 1, 'pred5') pred6 = self._conv_layer(feat6, 4 * (self.num_classes + 4), 3, 1, 'pred6') pred7 = self._conv_layer(feat7, 4 * (self.num_classes + 4), 3, 1, 'pred7') if self.data_format == 'channels_first': pred1 = tf.transpose(pred1, [0, 2, 3, 1]) pred2 = tf.transpose(pred2, [0, 2, 3, 1]) pred3 = tf.transpose(pred3, [0, 2, 3, 1]) pred4 = tf.transpose(pred4, [0, 2, 3, 1]) pred5 = tf.transpose(pred5, [0, 2, 3, 1]) pred6 = tf.transpose(pred6, [0, 2, 3, 1]) pred7 = tf.transpose(pred7, [0, 2, 3, 1]) p1shape = tf.shape(pred1) p2shape = tf.shape(pred2) p3shape = tf.shape(pred3) p4shape = tf.shape(pred4) p5shape = tf.shape(pred5) p6shape = tf.shape(pred6) p7shape = tf.shape(pred7) with tf.variable_scope('inference'): p1bbox_yx, p1bbox_hw, p1conf = self._get_pbbox(pred1) p2bbox_yx, p2bbox_hw, p2conf = self._get_pbbox(pred2) p3bbox_yx, p3bbox_hw, p3conf = self._get_pbbox(pred3) p4bbox_yx, p4bbox_hw, p4conf = self._get_pbbox(pred4) p5bbox_yx, p5bbox_hw, p5conf = self._get_pbbox(pred5) p6bbox_yx, p6bbox_hw, p6conf = self._get_pbbox(pred6) p7bbox_yx, p7bbox_hw, p7conf = self._get_pbbox(pred7) s = [0.07 * self.input_size] s = s + [(0.15 + (0.9 - 0.15) / 5 * (i - 1)) * self.input_size for i in range(1, 8)] s = [[s[i], (s[i] * s[i + 1])**0.5] for i in range(0, 7)] a1bbox_y1x1, a1bbox_y2x2, a1bbox_yx, a1bbox_hw = self._get_abbox( s[0], [2, 1 / 2], p1shape) a2bbox_y1x1, a2bbox_y2x2, a2bbox_yx, a2bbox_hw = self._get_abbox( s[1], [2, 1 / 2, 3, 1 / 3], p2shape) a3bbox_y1x1, a3bbox_y2x2, a3bbox_yx, a3bbox_hw = self._get_abbox( s[2], [2, 1 / 2, 3, 1 / 3], p3shape) a4bbox_y1x1, a4bbox_y2x2, a4bbox_yx, a4bbox_hw = self._get_abbox( s[3], [2, 1 / 2, 3, 1 / 3], p4shape) a5bbox_y1x1, a5bbox_y2x2, a5bbox_yx, a5bbox_hw = self._get_abbox( s[4], [2, 1 / 2, 3, 1 / 3], p5shape) a6bbox_y1x1, a6bbox_y2x2, a6bbox_yx, a6bbox_hw = self._get_abbox( s[5], [2, 1 / 2], p6shape) a7bbox_y1x1, a7bbox_y2x2, a7bbox_yx, a7bbox_hw = self._get_abbox( s[6], [2, 1 / 2], p7shape) pbbox_yx = tf.concat([ p1bbox_yx, p2bbox_yx, p3bbox_yx, p4bbox_yx, p5bbox_yx, p6bbox_yx, p7bbox_yx ], axis=1) pbbox_hw = tf.concat([ p1bbox_hw, p2bbox_hw, p3bbox_hw, p4bbox_hw, p5bbox_hw, p6bbox_hw, p7bbox_hw ], axis=1) pconf = tf.concat( [p1conf, p2conf, p3conf, p4conf, p5conf, p6conf, p7conf], axis=1) abbox_y1x1 = tf.concat([ a1bbox_y1x1, a2bbox_y1x1, a3bbox_y1x1, a4bbox_y1x1, a5bbox_y1x1, a6bbox_y1x1, a7bbox_y1x1 ], axis=0) abbox_y2x2 = tf.concat([ a1bbox_y2x2, a2bbox_y2x2, a3bbox_y2x2, a4bbox_y2x2, a5bbox_y2x2, a6bbox_y2x2, a7bbox_y2x2 ], axis=0) abbox_yx = tf.concat([ a1bbox_yx, a2bbox_yx, a3bbox_yx, a4bbox_yx, a5bbox_yx, a6bbox_yx, a7bbox_yx ], axis=0) abbox_hw = tf.concat([ a1bbox_hw, a2bbox_hw, a3bbox_hw, a4bbox_hw, a5bbox_hw, a6bbox_hw, a7bbox_hw ], axis=0) if self.mode == 'train': i = 0. loss = 0. cond = lambda loss, i: tf.less( i, tf.cast(self.batch_size, tf.float32)) body = lambda loss, i: (tf.add( loss, self._compute_one_image_loss( tf.squeeze(tf.gather(pbbox_yx, tf.cast(i, tf.int32))), tf.squeeze(tf.gather(pbbox_hw, tf.cast(i, tf.int32))), abbox_y1x1, abbox_y2x2, abbox_yx, abbox_hw, tf.squeeze(tf.gather(pconf, tf.cast(i, tf.int32))), tf.squeeze( tf.gather(self.ground_truth, tf.cast(i, tf.int32)) ), )), tf.add(i, 1.)) init_state = (loss, i) state = tf.while_loop(cond, body, init_state) total_loss, _ = state total_loss = total_loss / self.batch_size optimizer = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=.9) self.loss = total_loss + self.weight_decay * tf.add_n( [tf.nn.l2_loss(var) for var in tf.trainable_variables()]) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_op = optimizer.minimize(self.loss, global_step=self.global_step) self.train_op = tf.group([update_ops, train_op]) else: pbbox_yxt = pbbox_yx[0, ...] pbbox_hwt = pbbox_hw[0, ...] confidence = tf.nn.softmax(pconf[0, ...]) class_id = tf.argmax(confidence, axis=-1) conf_mask = tf.less(class_id, self.num_classes - 1) pbbox_yxt = tf.boolean_mask(pbbox_yxt, conf_mask) pbbox_hwt = tf.boolean_mask(pbbox_hwt, conf_mask) confidence = tf.boolean_mask( confidence, conf_mask)[:, :self.num_classes - 1] abbox_yxt = tf.boolean_mask(abbox_yx, conf_mask) abbox_hwt = tf.boolean_mask(abbox_hw, conf_mask) dpbbox_yxt = pbbox_yxt * abbox_hwt + abbox_yxt dpbbox_hwt = abbox_hwt * tf.exp(pbbox_hwt) dpbbox_y1x1 = dpbbox_yxt - dpbbox_hwt / 2. dpbbox_y2x2 = dpbbox_yxt + dpbbox_hwt / 2. dpbbox_y1x1y2x2 = tf.concat([dpbbox_y1x1, dpbbox_y2x2], axis=-1) filter_mask = tf.greater_equal(confidence, self.nms_score_threshold) scores = [] class_id = [] bbox = [] for i in range(self.num_classes - 1): scoresi = tf.boolean_mask(confidence[:, i], filter_mask[:, i]) bboxi = tf.boolean_mask(dpbbox_y1x1y2x2, filter_mask[:, i]) selected_indices = tf.image.non_max_suppression( bboxi, scoresi, self.nms_max_boxes, self.nms_iou_threshold, ) scores.append(tf.gather(scoresi, selected_indices)) bbox.append(tf.gather(bboxi, selected_indices)) class_id.append( tf.ones_like(tf.gather(scoresi, selected_indices), tf.int32) * i) bbox = tf.concat(bbox, axis=0) scores = tf.concat(scores, axis=0) class_id = tf.concat(class_id, axis=0) self.detection_pred = [scores, bbox, class_id]
def _build(self, proposals, bbox_pred, cls_prob, im_shape): """ Args: proposals: Tensor with the RPN proposals bounding boxes. Shape (num_proposals, 4). Where num_proposals is less than POST_NMS_TOP_N (We don't know exactly beforehand) bbox_pred: Tensor with the RCNN delta predictions for each proposal for each class. Shape (num_proposals, 4 * num_classes) cls_prob: A softmax probability for each proposal where the idx = 0 is the background class (which we should ignore). Shape (num_proposals, num_classes + 1) Returns: objects: Shape (final_num_proposals, 4) Where final_num_proposals is unknown before-hand (it depends on NMS). The 4-length Tensor for each corresponds to: (x_min, y_min, x_max, y_max). objects_label: Shape (final_num_proposals,) objects_label_prob: Shape (final_num_proposals,) """ selected_boxes = [] selected_probs = [] selected_labels = [] # For each class, take the proposals with the class-specific # predictions (class scores and bbox regression) and filter accordingly # (valid area, min probability score and NMS). for class_id in range(self._num_classes): # Apply the class-specific transformations to the proposals to # obtain the current class' prediction. class_prob = cls_prob[:, class_id + 1] # 0 is background class. class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)] raw_class_objects = decode( proposals, class_bboxes, variances=self._variances, ) # Clip bboxes so they don't go out of the image. class_objects = clip_boxes(raw_class_objects, im_shape) # Filter objects based on the min probability threshold and on them # having a valid area. prob_filter = tf.greater_equal(class_prob, self._min_prob_threshold) (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1) area_filter = tf.greater( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 0.0) object_filter = tf.logical_and(area_filter, prob_filter) class_objects = tf.boolean_mask(class_objects, object_filter) class_prob = tf.boolean_mask(class_prob, object_filter) # We have to use the TensorFlow's bounding box convention to use # the included function for NMS. class_objects_tf = change_order(class_objects) # Apply class NMS. class_selected_idx = tf.image.non_max_suppression( class_objects_tf, class_prob, self._class_max_detections, iou_threshold=self._class_nms_threshold, ) # Using NMS resulting indices, gather values from Tensors. class_objects_tf = tf.gather(class_objects_tf, class_selected_idx) class_prob = tf.gather(class_prob, class_selected_idx) # Revert to our bbox convention. class_objects = change_order(class_objects_tf) # We append values to a regular list which will later be # transformed to a proper Tensor. selected_boxes.append(class_objects) selected_probs.append(class_prob) # In the case of the class_id, since it is a loop on classes, we # already have a fixed class_id. We use `tf.tile` to create that # Tensor with the total number of indices returned by the NMS. selected_labels.append( tf.tile([class_id], [tf.shape(class_selected_idx)[0]])) # We use concat (axis=0) to generate a Tensor where the rows are # stacked on top of each other objects = tf.concat(selected_boxes, axis=0) proposal_label = tf.concat(selected_labels, axis=0) proposal_label_prob = tf.concat(selected_probs, axis=0) tf.summary.histogram("proposal_cls_scores", proposal_label_prob, ["rcnn"]) # Get top-k detections of all classes. k = tf.minimum(self._total_max_detections, tf.shape(proposal_label_prob)[0]) top_k = tf.nn.top_k(proposal_label_prob, k=k) top_k_proposal_label_prob = top_k.values top_k_objects = tf.gather(objects, top_k.indices) top_k_proposal_label = tf.gather(proposal_label, top_k.indices) return { "objects": top_k_objects, "proposal_label": top_k_proposal_label, "proposal_label_prob": top_k_proposal_label_prob, "selected_boxes": selected_boxes, "selected_probs": selected_probs, "selected_labels": selected_labels, }
def build_graph( hdr, # [b, h, w, c] crf, # [b, k] t, # [b] is_training, ): b, h, w, c, = get_tensor_shape(hdr) b, k, = get_tensor_shape(crf) b, = get_tensor_shape(t) _hdr_t = hdr * tf.reshape(t, [b, 1, 1, 1]) # Augment Poisson and Gaussian noise sigma_s = 0.08 / 6 * tf.random_uniform([tf.shape(_hdr_t)[0], 1, 1, 3], minval=0.0, maxval=1.0, dtype=tf.float32, seed=1) sigma_c = 0.005 * tf.random_uniform([tf.shape(_hdr_t)[0], 1, 1, 3], minval=0.0, maxval=1.0, dtype=tf.float32, seed=1) noise_s_map = sigma_s * _hdr_t noise_s = tf.random_normal(shape=tf.shape(_hdr_t), seed=1) * noise_s_map temp_x = _hdr_t + noise_s noise_c = sigma_c * tf.random_normal(shape=tf.shape(_hdr_t), seed=1) temp_x = temp_x + noise_c _hdr_t = tf.nn.relu(temp_x) # Dynamic range clipping clipped_hdr_t = _clip(_hdr_t) # Camera response function ldr = apply_rf(clipped_hdr_t, crf) # Quantization and JPEG compression quantized_hdr = tf.round(ldr * 255.0) quantized_hdr_8bit = tf.cast(quantized_hdr, tf.uint8) jpeg_img_list = [] for i in range(ARGS.batch_size): II = quantized_hdr_8bit[i] II = tf.image.adjust_jpeg_quality( II, int(round(float(i) / float(ARGS.batch_size - 1) * 10.0 + 90.0))) jpeg_img_list.append(II) jpeg_img = tf.stack(jpeg_img_list, 0) jpeg_img_float = tf.cast(jpeg_img, tf.float32) / 255.0 jpeg_img_float.set_shape([None, 256, 256, 3]) # loss mask to exclude over-/under-exposed regions gray = tf.image.rgb_to_grayscale(jpeg_img) over_exposed = tf.cast(tf.greater_equal(gray, 249), tf.float32) over_exposed = tf.reduce_sum(over_exposed, axis=[1, 2], keepdims=True) over_exposed = tf.greater(over_exposed, 256.0 * 256.0 * 0.5) under_exposed = tf.cast(tf.less_equal(gray, 6), tf.float32) under_exposed = tf.reduce_sum(under_exposed, axis=[1, 2], keepdims=True) under_exposed = tf.greater(under_exposed, 256.0 * 256.0 * 0.5) extreme_cases = tf.logical_or(over_exposed, under_exposed) loss_mask = tf.cast(tf.logical_not(extreme_cases), tf.float32) with tf.variable_scope("Dequantization_Net"): model = Dequantization_net(is_train=is_training) pred = _clip(model.inference(jpeg_img_float)) loss = get_l2_loss_with_mask(pred, ldr) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = tf.train.AdamOptimizer(learning_rate=1e-4).minimize( tf.reduce_mean(loss * loss_mask)) mse = tf.reduce_mean((pred - ldr)**2) psnr = 20.0 * log10(1.0) - 10.0 * log10(mse) mse = tf.reduce_mean((jpeg_img_float - ldr)**2) psnr_no_q = 20.0 * log10(1.0) - 10.0 * log10(mse) tf.summary.scalar('loss', tf.reduce_mean(loss)) tf.summary.image('ldr', ldr) tf.summary.image('jpeg_img_float', jpeg_img_float) tf.summary.image('pred', pred) tf.summary.scalar('loss_mask 0', tf.squeeze(loss_mask[0])) tf.summary.scalar('loss_mask 1', tf.squeeze(loss_mask[1])) tf.summary.scalar('loss_mask 2', tf.squeeze(loss_mask[2])) return loss, train_op, psnr, psnr_no_q
def __init__(self, conf, tasksconf, dataconf, modelconf, evaluatorconf, expdir, init_filename, task_index): """ MultiTaskTrainer constructor, creates the training graph Args: conf: the trainer config tasksconf: the config file for each task dataconf: the data configuration as a ConfigParser modelconf: the neural net model configuration evaluatorconf: the evaluator configuration for evaluating if None no evaluation will be done expdir: directory where the summaries will be written init_filename: filename of the network that should be used to initialize the model. Put to None if no network is available/wanted. task_index: optional index of the worker task in the cluster """ self.expdir = expdir self.conf = conf self.tasksconf = tasksconf self.task_index = task_index self.init_filename = init_filename self.batch_size = int(conf['batch_size']) self.tasks = self.conf['tasks'].split(' ') # create the graph self.graph = tf.Graph() # create the model modelfile = os.path.join(expdir, 'model', 'model.pkl') model_names = modelconf.get('hyper', 'model_names').split(' ') self.models = dict() with open(modelfile, 'wb') as fid: for model_name in model_names: self.models[model_name] = model_factory.factory( modelconf.get(model_name, 'architecture'))(conf=dict( modelconf.items(model_name)), name=model_name) Pickle.dump(self.models, fid) evaltype = evaluatorconf.get('evaluator', 'evaluator') # define a trainer per traintask self.task_trainers = [] for task in self.tasks: taskconf = self.tasksconf[task] task_trainer = task_trainer_script.TaskTrainer( task, conf, taskconf, self.models, modelconf, dataconf, evaluatorconf, self.batch_size) self.task_trainers.append(task_trainer) nr_tasks = len(self.task_trainers) num_replicas = 1 # device = tf.DeviceSpec(job='local') self.is_chief = task_index == 0 # define the placeholders in the graph with self.graph.as_default(): # create a local num_steps variable self.num_steps = tf.get_variable( name='num_steps', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) # a variable to hold the amount of steps already taken self.global_step = tf.get_variable( name='global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) should_terminate = tf.get_variable( name='should_terminate', shape=[], dtype=tf.bool, initializer=tf.constant_initializer(False), trainable=False) self.should_save_final_model = tf.get_variable( name='should_save_final_model', shape=[], dtype=tf.bool, initializer=tf.constant_initializer(True), trainable=False) self.dont_save_final_model = self.should_save_final_model.assign( False).op self.terminate = should_terminate.assign(True).op # create a check if training should continue self.should_stop = tf.logical_or( tf.greater_equal(self.global_step, self.num_steps), should_terminate) # with tf.device(device): num_steps = [] done_ops = [] # set the dataqueues for each trainer for task_trainer in self.task_trainers: task_num_steps, task_done_ops = task_trainer.set_dataqueues() num_steps.append(task_num_steps) done_ops += task_done_ops self.set_num_steps = self.num_steps.assign(min(num_steps)).op self.done = tf.group(*done_ops) # training part with tf.variable_scope('train'): # a variable to scale the learning rate (used to reduce the # learning rate in case validation performance drops) learning_rate_fact = tf.get_variable( name='learning_rate_fact', shape=[], initializer=tf.constant_initializer(1.0), trainable=False) # compute the learning rate with exponential decay and scale # with the learning rate factor self.learning_rate = (tf.train.exponential_decay( learning_rate=float(conf['initial_learning_rate']), global_step=self.global_step, decay_steps=self.num_steps, decay_rate=float(conf['learning_rate_decay'])) * learning_rate_fact) # For each task, set the task specific training ops for task_trainer in self.task_trainers: task_trainer.train(self.learning_rate) # Group ops over tasks self.process_minibatch = tf.group( *([ task_trainer.process_minibatch for task_trainer in self.task_trainers ]), name='process_minibatch_all_tasks') self.reset_grad_loss_norm = tf.group( *([ task_trainer.reset_grad_loss_norm for task_trainer in self.task_trainers ]), name='reset_grad_loss_norm_all_tasks') tmp = [] for task_trainer in self.task_trainers: tmp += task_trainer.normalize_gradients self.normalize_gradients = tf.group( *tmp, name='normalize_gradients_all_tasks') # accumulate losses from tasks with tf.variable_scope('accumulate_losses_from_tasks'): self.loss_all_tasks = [ task_trainer.normalized_loss for task_trainer in self.task_trainers ] self.total_loss = tf.reduce_mean(self.loss_all_tasks, name='acc_loss') tmp = [] for task_trainer in self.task_trainers: tmp.append(task_trainer.apply_gradients) # all remaining operations with the UPDATE_OPS GraphKeys update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # an op to increment the global step global_step_inc = self.global_step.assign_add(1) # create an operation to update the gradients, the batch_loss # and do all other update ops # self.update_op = tf.group( # *(tmp + update_ops + [global_step_inc]), # name='update') self.other_update_op = tf.group(*(update_ops + [global_step_inc]), name='other_update') if evaltype != 'None': # validation part with tf.variable_scope('validate'): # create a variable to save the last step where the model # was validated validated_step = tf.get_variable( name='validated_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer( -int(conf['valid_frequency'])), trainable=False) # a check if validation is due self.should_validate = tf.greater_equal( self.global_step - validated_step, int(conf['valid_frequency'])) # For each task, if requested, set the task specific validation ops # The number of validation batches is the minimum number of validation # batches over all tasks. tasks_excluded_for_val = ['None'] if evaluatorconf.has_option('evaluator', 'tasks_excluded_for_val'): tasks_excluded_for_val = evaluatorconf.get( 'evaluator', 'tasks_excluded_for_val').split(' ') self.val_task_trainers = [ task_trainer for task_trainer in self.task_trainers if task_trainer.task_name not in tasks_excluded_for_val ] valbatches = [] for task_trainer in self.val_task_trainers: valbatches.append(task_trainer.evaluate_evaluator()) self.valbatches = min(valbatches) # Group ops over tasks self.process_val_batch = tf.group(*([ task_trainer.process_val_batch for task_trainer in self.val_task_trainers ])) self.reset_val_loss_norm = tf.group(*([ task_trainer.reset_val_loss_norm for task_trainer in self.val_task_trainers ])) self.val_loss_all_tasks = [] for task_trainer in self.val_task_trainers: self.val_loss_all_tasks.append( task_trainer.val_loss_normalized) self.validation_loss = tf.reduce_mean( self.val_loss_all_tasks) # update the learning rate factor self.half_lr = learning_rate_fact.assign( learning_rate_fact / 2).op # create an operation to updated the validated step self.update_validated_step = validated_step.assign( self.global_step).op # variable to hold the best validation loss so far self.best_validation_all_tasks = [ tf.get_variable( name='best_validation_task_%i' % ind, shape=[], dtype=tf.float32, initializer=tf.constant_initializer(1.79e+308), trainable=False) for ind in range(len(self.val_task_trainers)) ] # op to update the best validation loss self.update_best_all_tasks = [ best_val_task.assign(self.val_loss_all_tasks[ind]) for ind, best_val_task in enumerate( self.best_validation_all_tasks) ] # variable to hold the previous validation loss self.previous_validation_all_tasks = [ tf.get_variable( name='previous_validation_task_%i' % ind, shape=[], dtype=tf.float32, initializer=tf.constant_initializer(1.79e+308), trainable=False) for ind in range(len(self.val_task_trainers)) ] # op to update the previous validation loss self.update_prev_all_tasks = [ prev_val_task.assign(self.val_loss_all_tasks[ind]) for ind, prev_val_task in enumerate( self.previous_validation_all_tasks) ] # variable to hold the last x relative loss improvements. x=num_tries self.rel_validation_all_tasks = [ tf.get_variable( name='rel_validation_task_%i' % ind, shape=[int(self.conf['num_tries'])], dtype=tf.float32, initializer=tf.constant_initializer(1.79e+308), trainable=False) for ind in range(len(self.val_task_trainers)) ] # op to update the relative loss improvements rel_impr = [(self.previous_validation_all_tasks[ind] - self.val_loss_all_tasks[ind]) / self.previous_validation_all_tasks[ind] for ind in range(nr_tasks)] all_rel_imprs = [ tf.concat([ rel_val_task[1:], tf.expand_dims(rel_impr[ind], -1) ], axis=0) for ind, rel_val_task in enumerate( self.rel_validation_all_tasks) ] self.update_rel_all_tasks = [ tf.assign(rel_val_task, all_rel_imprs[ind]) for ind, rel_val_task in enumerate( self.rel_validation_all_tasks) ] # a variable that holds the amount of workers at the # validation point waiting_workers = tf.get_variable( name='waiting_workers', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) # an operation to signal a waiting worker self.waiting = waiting_workers.assign_add(1).op # an operation to set the waiting workers to zero self.reset_waiting = waiting_workers.initializer # an operation to check if all workers are waiting self.all_waiting = tf.equal(waiting_workers, num_replicas - 1) tf.summary.scalar('validation loss', self.validation_loss) else: self.process_val_batch = None tf.summary.scalar('learning rate', self.learning_rate) # create a histogram for all trainable parameters for param in tf.trainable_variables(): tf.summary.histogram(param.name, param) # create the scaffold self.scaffold = tf.train.Scaffold()
def _parse_train_data(self, data): """Parse data for ShapeMask training.""" classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] masks = data['groundtruth_instance_masks'] is_crowds = data['groundtruth_is_crowd'] # Skips annotations with `is_crowd` = True. if self._skip_crowd_during_training and self._is_training: num_groundtrtuhs = tf.shape(classes)[0] with tf.control_dependencies([num_groundtrtuhs, is_crowds]): indices = tf.cond( tf.greater(tf.size(is_crowds), 0), lambda: tf.where(tf.logical_not(is_crowds))[:, 0], lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) masks = tf.gather(masks, indices) # Gets original image and its size. image = data['image'] image_shape = tf.shape(image)[0:2] # If not using category, makes all categories with id = 0. if not self._use_category: classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32) # Normalizes image with mean and std pixel values. image = input_utils.normalize_image(image) # Flips image randomly during training. if self._aug_rand_hflip: image, boxes, masks = input_utils.random_horizontal_flip( image, boxes, masks) # Converts boxes from normalized coordinates to pixel coordinates. boxes = box_utils.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = input_utils.resize_and_crop_image( image, self._output_size, self._output_size, aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_scale = image_info[2, :] offset = image_info[3, :] # Resizes and crops boxes and masks. boxes = input_utils.resize_and_crop_boxes(boxes, image_scale, image_info[1, :], offset) # Filters out ground truth boxes that are all zeros. indices = box_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) masks = tf.gather(masks, indices) # Assigns anchors. input_anchor = anchor.Anchor(self._min_level, self._max_level, self._num_scales, self._aspect_ratios, self._anchor_size, self._output_size) anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold, self._unmatched_threshold) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) # Sample groundtruth masks/boxes/classes for mask branch. num_masks = tf.shape(masks)[0] mask_shape = tf.shape(masks)[1:3] # Pad sampled boxes/masks/classes to a constant batch size. padded_boxes = pad_to_size(boxes, self._num_sampled_masks) padded_classes = pad_to_size(classes, self._num_sampled_masks) padded_masks = pad_to_size(masks, self._num_sampled_masks) # Randomly sample groundtruth masks for mask branch training. For the image # without groundtruth masks, it will sample the dummy padded tensors. rand_indices = tf.random.shuffle( tf.range(tf.maximum(num_masks, self._num_sampled_masks))) rand_indices = tf.math.mod(rand_indices, tf.maximum(num_masks, 1)) rand_indices = rand_indices[0:self._num_sampled_masks] rand_indices = tf.reshape(rand_indices, [self._num_sampled_masks]) sampled_boxes = tf.gather(padded_boxes, rand_indices) sampled_classes = tf.gather(padded_classes, rand_indices) sampled_masks = tf.gather(padded_masks, rand_indices) # Jitter the sampled boxes to mimic the noisy detections. sampled_boxes = box_utils.jitter_boxes( sampled_boxes, noise_scale=self._box_jitter_scale) sampled_boxes = box_utils.clip_boxes(sampled_boxes, self._output_size) # Compute mask targets in feature crop. A feature crop fully contains a # sampled box. mask_outer_boxes = box_utils.compute_outer_boxes( sampled_boxes, tf.shape(image)[0:2], scale=self._outer_box_scale) mask_outer_boxes = box_utils.clip_boxes(mask_outer_boxes, self._output_size) # Compensate the offset of mask_outer_boxes to map it back to original image # scale. mask_outer_boxes_ori = mask_outer_boxes mask_outer_boxes_ori += tf.tile(tf.expand_dims(offset, axis=0), [1, 2]) mask_outer_boxes_ori /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2]) norm_mask_outer_boxes_ori = box_utils.normalize_boxes( mask_outer_boxes_ori, mask_shape) # Set sampled_masks shape to [batch_size, height, width, 1]. sampled_masks = tf.cast(tf.expand_dims(sampled_masks, axis=-1), tf.float32) mask_targets = tf.image.crop_and_resize( sampled_masks, norm_mask_outer_boxes_ori, box_indices=tf.range(self._num_sampled_masks), crop_size=[self._mask_crop_size, self._mask_crop_size], method='bilinear', extrapolation_value=0, name='train_mask_targets') mask_targets = tf.where(tf.greater_equal(mask_targets, 0.5), tf.ones_like(mask_targets), tf.zeros_like(mask_targets)) mask_targets = tf.squeeze(mask_targets, axis=-1) if self._up_sample_factor > 1: fine_mask_targets = tf.image.crop_and_resize( sampled_masks, norm_mask_outer_boxes_ori, box_indices=tf.range(self._num_sampled_masks), crop_size=[ self._mask_crop_size * self._up_sample_factor, self._mask_crop_size * self._up_sample_factor ], method='bilinear', extrapolation_value=0, name='train_mask_targets') fine_mask_targets = tf.where( tf.greater_equal(fine_mask_targets, 0.5), tf.ones_like(fine_mask_targets), tf.zeros_like(fine_mask_targets)) fine_mask_targets = tf.squeeze(fine_mask_targets, axis=-1) else: fine_mask_targets = mask_targets # If bfloat16 is used, casts input image to tf.bfloat16. if self._use_bfloat16: image = tf.cast(image, dtype=tf.bfloat16) valid_image = tf.cast(tf.not_equal(num_masks, 0), tf.int32) if self._mask_train_class == 'all': mask_is_valid = valid_image * tf.ones_like(sampled_classes, tf.int32) else: # Get the intersection of sampled classes with training splits. mask_valid_classes = tf.cast( tf.expand_dims( class_utils.coco_split_class_ids(self._mask_train_class), 1), sampled_classes.dtype) match = tf.reduce_any( tf.equal(tf.expand_dims(sampled_classes, 0), mask_valid_classes), 0) mask_is_valid = valid_image * tf.cast(match, tf.int32) # Packs labels for model_fn outputs. labels = { 'cls_targets': cls_targets, 'box_targets': box_targets, 'anchor_boxes': input_anchor.multilevel_boxes, 'num_positives': num_positives, 'image_info': image_info, # For ShapeMask. 'mask_targets': mask_targets, 'fine_mask_targets': fine_mask_targets, 'mask_is_valid': mask_is_valid, } inputs = { 'image': image, 'mask_boxes': sampled_boxes, 'mask_outer_boxes': mask_outer_boxes, 'mask_classes': sampled_classes, } return inputs, labels
def distortion_homography_adaptation(image, net, config): """Performs radial distortion and homography adaptation. Arguments: image: a 'Tensor' with shape '[N,H,W,1]'. net: A function that takes an image as input, performs inference, and outputs the prediction dictionary. config: A configuration dictionary containing the distortion factor 'dist_fact' and optional enteries such as number of sampled homographies 'num', the aggregation method 'aggregation. Returns: A dictionary which contains the aggregated detection probabilities. """ probs = net(image)['prob'] counts = tf.ones_like(probs) images = image probs = tf.expand_dims(probs, axis=-1) counts = tf.expand_dims(counts, axis=-1) images = tf.expand_dims(images, axis=-1) shape = tf.shape(image)[1:3] config = dict_update(homography_adaptation_default_config, config) def step(i, probs, counts, images): #Sample image patch H = sample_homography(shape, **config['homographies']) H_inv = invert_homography(H) ############################################# H_ = shape[0] W = shape[1] row_c = tf.random_uniform(shape=[], minval=0, maxval=tf.cast(H_, tf.float32), dtype=tf.float32) col_c = tf.random_uniform(shape=[], minval=0, maxval=tf.cast(W, tf.float32), dtype=tf.float32) lambda_ = tf.constant(0.000006) ############################################# #apply the homography warped = H_transform(image, H, interpolation='BILINEAR') ############################################# #apply the radial distortion warped = distort(warped, lambda_, (row_c, col_c)) #count = warp_points_dist(tf.expand_dims(tf.ones(tf.shape(image)[:3]),-1), lambda_, (row_c,col_c), inverse=True) count = undistort(tf.expand_dims(tf.ones(tf.shape(image)[:3]), -1), lambda_, (row_c, col_c)) #count = tf.round(count) count = H_transform(count, H_inv, interpolation='NEAREST') mask = H_transform(tf.expand_dims(tf.ones(tf.shape(image)[:3]), -1), H, interpolation='NEAREST') mask = distort(mask, lambda_, (row_c, col_c)) ############################################# # Ignore the detections too close to the border to avoid artifacts if config['valid_border_margin']: kernel = cv.getStructuringElement( cv.MORPH_ELLIPSE, (config['valid_border_margin'] * 2, ) * 2) with tf.device('/cpu:0'): count = tf.nn.erosion2d( count, tf.to_float(tf.constant(kernel)[..., tf.newaxis]), [1, 1, 1, 1], [1, 1, 1, 1], 'SAME')[..., 0] + 1. mask = tf.nn.erosion2d( mask, tf.to_float(tf.constant(kernel)[..., tf.newaxis]), [1, 1, 1, 1], [1, 1, 1, 1], 'SAME')[..., 0] + 1. # Predict detection probabilities prob = net(warped)['prob'] prob = prob * mask prob_proj = undistort(tf.expand_dims(prob, -1), lambda_, (row_c, col_c)) prob_proj = H_transform(prob_proj, H_inv, interpolation='BILINEAR')[..., 0] prob_proj = prob_proj * count probs = tf.concat([probs, tf.expand_dims(prob_proj, -1)], axis=-1) counts = tf.concat([counts, tf.expand_dims(count, -1)], axis=-1) images = tf.concat([images, tf.expand_dims(warped, -1)], axis=-1) return i + 1, probs, counts, images _, probs, counts, images = tf.while_loop( lambda i, p, c, im: tf.less(i, config['num'] - 1), step, [0, probs, counts, images], parallel_iterations=1, back_prop=False, shape_invariants=[ tf.TensorShape([]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, 1, None]) ]) counts = tf.reduce_sum(counts, axis=-1) max_prob = tf.reduce_max(probs, axis=-1) mean_prob = tf.reduce_sum(probs, axis=-1) / counts if config['aggregation'] == 'max': prob = max_prob elif config['aggregation'] == 'sum': prob = mean_prob else: raise ValueError('Unkown aggregation method: {}'.format( config['aggregation'])) if config['filter_counts']: prob = tf.where(tf.greater_equal(counts, config['filter_counts']), prob, tf.zeros_like(prob)) return { 'prob': prob, 'counts': counts, 'mean_prob': mean_prob, 'input_images': images, 'H_probs': probs } # debug
def postprocess_fastrcnn(self, rois, bbox_ppred, scores, img_shape): ''' :param rois:[-1, 4] :param bbox_ppred: [-1, (cfgs.Class_num+1) * 4] :param scores: [-1, cfgs.Class_num + 1] :return: ''' with tf.name_scope('postprocess_fastrcnn'): rois = tf.stop_gradient(rois) scores = tf.stop_gradient(scores) bbox_ppred = tf.reshape(bbox_ppred, [-1, cfgs.CLASS_NUM + 1, 4]) bbox_ppred = tf.stop_gradient(bbox_ppred) bbox_pred_list = tf.unstack(bbox_ppred, axis=1) score_list = tf.unstack(scores, axis=1) allclasses_boxes = [] allclasses_scores = [] categories = [] for i in range(1, cfgs.CLASS_NUM+1): # 1. decode boxes in each class tmp_encoded_box = bbox_pred_list[i] tmp_score = score_list[i] tmp_decoded_boxes = encode_and_decode.decode_boxes(encoded_boxes=tmp_encoded_box, reference_boxes=rois, scale_factors=cfgs.ROI_SCALE_FACTORS) # tmp_decoded_boxes = encode_and_decode.decode_boxes(boxes=rois, # deltas=tmp_encoded_box, # scale_factor=cfgs.ROI_SCALE_FACTORS) # 2. clip to img boundaries tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries(decode_boxes=tmp_decoded_boxes, img_shape=img_shape) # 3. NMS keep = tf.image.non_max_suppression( boxes=tmp_decoded_boxes, scores=tmp_score, max_output_size=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD) perclass_boxes = tf.gather(tmp_decoded_boxes, keep) perclass_scores = tf.gather(tmp_score, keep) allclasses_boxes.append(perclass_boxes) allclasses_scores.append(perclass_scores) categories.append(tf.ones_like(perclass_scores) * i) final_boxes = tf.concat(allclasses_boxes, axis=0) final_scores = tf.concat(allclasses_scores, axis=0) final_category = tf.concat(categories, axis=0) if self.is_training: ''' in training. We should show the detecitons in the tensorboard. So we add this. ''' kept_indices = tf.reshape(tf.where(tf.greater_equal(final_scores, cfgs.SHOW_SCORE_THRSHOLD)), [-1]) final_boxes = tf.gather(final_boxes, kept_indices) final_scores = tf.gather(final_scores, kept_indices) final_category = tf.gather(final_category, kept_indices) return final_boxes, final_scores, final_category
def __init__(self, conf, tasksconf, dataconf, modelconf, evaluatorconf, expdir, init_filename, server, task_index): ''' NnetTrainer constructor, creates the training graph Args: conf: the trainer config taskconf: the config file for each task dataconf: the data configuration as a ConfigParser modelconf: the neural net model configuration evaluatorconf: the evaluator configuration for evaluating if None no evaluation will be done expdir: directory where the summaries will be written init_filename: filename of the network that should be used to initialize the model. Put to None if no network is available/wanted. server: optional server to be used for distributed training task_index: optional index of the worker task in the cluster ''' self.expdir = expdir self.server = server self.conf = conf self.tasksconf = tasksconf self.task_index = task_index self.init_filename = init_filename self.batch_size = int(conf['batch_size']) cluster = tf.train.ClusterSpec(server.server_def.cluster) #create the graph self.graph = tf.Graph() if 'local' in cluster.as_dict(): num_replicas = 1 device = tf.DeviceSpec(job='local') else: #distributed training num_replicas = len(cluster.as_dict()['worker']) num_servers = len(cluster.as_dict()['ps']) ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy( num_tasks=num_servers, load_fn=tf.contrib.training.byte_size_load_fn) device = tf.train.replica_device_setter(ps_tasks=num_servers, ps_strategy=ps_strategy) chief_ps = tf.DeviceSpec(job='ps', task=0) self.is_chief = task_index == 0 #create the model modelfile = os.path.join(expdir, 'model', 'model.pkl') with open(modelfile, 'wb') as fid: self.model = model_factory.factory( modelconf.get('model', 'architecture'))(conf=modelconf) pickle.dump(self.model, fid) evaltype = evaluatorconf.get('evaluator', 'evaluator') #define the placeholders in the graph with self.graph.as_default(): #create a local num_steps variable self.num_steps = tf.get_variable( name='num_steps', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) #a variable to hold the amount of steps already taken self.global_step = tf.get_variable( name='global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) should_terminate = tf.get_variable( name='should_terminate', shape=[], dtype=tf.bool, initializer=tf.constant_initializer(False), trainable=False) self.terminate = should_terminate.assign(True).op #create a check if training should continue self.should_stop = tf.logical_or( tf.greater_equal(self.global_step, self.num_steps), should_terminate) with tf.variable_scope('train') as train_scope: tasks_losses = [] if evaltype != 'None': with tf.variable_scope('validate') as val_scope: tasks_val_losses = [] #3 model types for multi task: single one to one; single one to many; multiple one to one #single one to one: the whole model is shared for all tasks, only loss function can be different #single one to many: each task has a separate output so only part of the network is shared, eg evrything but the output layer #multiple one to one: each task has its own network. Possibly the outputs are combined in a loss function for task in self.conf['tasks'].split(' '): taskconf = self.tasksconf[task] #get the database configurations input_names = modelconf.get('io', 'inputs').split(' ') if input_names == ['']: input_names = [] input_sections = [taskconf[i].split(' ') for i in input_names] input_dataconfs = [] for sectionset in input_sections: input_dataconfs.append([]) for section in sectionset: input_dataconfs[-1].append(dict(dataconf.items(section))) output_names = taskconf['targets'].split(' ') if output_names == ['']: output_names = [] target_sections = [taskconf[o].split(' ') for o in output_names] target_dataconfs = [] for sectionset in target_sections: target_dataconfs.append([]) for section in sectionset: target_dataconfs[-1].append(dict(dataconf.items(section))) #create the loss computer loss_computer = loss_computer_factory.factory( taskconf['loss_type'])(self.batch_size) #create the evaluator if evaltype != 'None': evaluator = evaluator_factory.factory(evaltype)( conf=evaluatorconf, dataconf=dataconf, model=self.model, task=task) with self.graph.as_default(): #check if running in distributed model if 'local' in cluster.as_dict(): #get the filenames data_queue_elements, _ = input_pipeline.get_filenames( input_dataconfs + target_dataconfs) #create the data queue and queue runners (inputs get shuffled! I already did this so set to False) data_queue = tf.train.string_input_producer( string_tensor=data_queue_elements, shuffle=False, seed=None, capacity=self.batch_size * 2, shared_name='data_queue') #compute the number of steps if int(conf['numbatches_to_aggregate']) == 0: num_steps = (int(conf['num_epochs']) * len(data_queue_elements) / self.batch_size) else: num_steps = (int(conf['num_epochs']) * len(data_queue_elements) / (self.batch_size * int(conf['numbatches_to_aggregate']))) #set the number of steps self.set_num_steps = self.num_steps.assign(num_steps).op self.done = tf.no_op() else: with tf.device(chief_ps): #get the data queue data_queue = tf.FIFOQueue(capacity=self.batch_size * (num_replicas + 1), shared_name='data_queue', name='data_queue', dtypes=[tf.string], shapes=[[]]) #get the number of steps from the parameter server num_steps_queue = tf.FIFOQueue( capacity=num_replicas, dtypes=[tf.int32], shared_name='num_steps_queue', name='num_steps_queue', shapes=[[]]) #set the number of steps self.set_num_steps = self.num_steps.assign( num_steps_queue.dequeue()).op #get the done queues done_ops = [] for i in range(num_servers): with tf.device('job:ps/task:%d' % i): done_queue = tf.FIFOQueue( capacity=num_replicas, dtypes=[tf.bool], shapes=[[]], shared_name='done_queue%d' % i, name='done_queue%d' % i) done_ops.append(done_queue.enqueue(True)) self.done = tf.group(*done_ops) #training part with tf.variable_scope(train_scope): with tf.variable_scope(task): #create the input pipeline data, seq_length = input_pipeline.input_pipeline( data_queue=data_queue, batch_size=self.batch_size, numbuckets=int(conf['numbuckets']), dataconfs=input_dataconfs + target_dataconfs) inputs = { input_names[i]: d for i, d in enumerate(data[:len(input_sections)]) } seq_length = { input_names[i]: d for i, d in enumerate( seq_length[:len(input_sections)]) } targets = { output_names[i]: d for i, d in enumerate(data[len(input_sections):]) } #target_seq_length = { #output_names[i]: d #for i, d in enumerate(seq_length[len(input_sections):])} #compute the training outputs of the model logits = self.model(inputs=inputs, input_seq_length=seq_length, is_training=True) #compute the loss task_loss = loss_computer(targets, logits, seq_length) tasks_losses.append(task_loss) #validation part if evaltype != 'None': with tf.variable_scope(val_scope): with tf.variable_scope(task): task_val_batch_loss, self.valbatches, _, _ = evaluator.evaluate( ) tasks_val_losses.append(task_val_batch_loss) with self.graph.as_default(): with tf.variable_scope(train_scope): #a variable to scale the learning rate (used to reduce the #learning rate in case validation performance drops) learning_rate_fact = tf.get_variable( name='learning_rate_fact', shape=[], initializer=tf.constant_initializer(1.0), trainable=False) #compute the learning rate with exponential decay and scale #with the learning rate factor self.learning_rate = (tf.train.exponential_decay( learning_rate=float(conf['initial_learning_rate']), global_step=self.global_step, decay_steps=self.num_steps, decay_rate=float(conf['learning_rate_decay'])) * learning_rate_fact) #create the optimizer optimizer = tf.train.AdamOptimizer(self.learning_rate) #TODO: The proper way to exploit data paralellism is via the #SyncReplicasOptimizer defined below. However for some reason it hangs #and I have not yet found a solution for it. For the moment the gradients #are accumulated in a way that does not allow data paralellism and there # is no advantage on having multiple workers. (We also accumulate the loss) #create an optimizer that aggregates gradients #if int(conf['numbatches_to_aggregate']) > 0: #optimizer = tf.train.SyncReplicasOptimizer( #opt=optimizer, #replicas_to_aggregate=int( #conf['numbatches_to_aggregate'])#, ##total_num_replicas=num_replicas #) loss = tf.reduce_mean(tasks_losses) self.total_loss = tf.get_variable( name='total_loss', shape=[], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) self.reset_loss = self.total_loss.assign(0.0) self.acc_loss = self.total_loss.assign_add(loss) ##compute the gradients #grads_and_vars = optimizer.compute_gradients(self.loss) #with tf.variable_scope('clip'): #clip_value = float(conf['clip_grad_value']) ##clip the gradients #grads_and_vars = [(tf.clip_by_value(grad, -clip_value, clip_value), var) #for grad, var in grads_and_vars] self.params = tf.trainable_variables() grads = [ tf.get_variable(param.op.name, param.get_shape().as_list(), initializer=tf.constant_initializer(0), trainable=False) for param in self.params ] self.reset_grad = tf.variables_initializer(grads) #compute the gradients minibatch_grads_and_vars = optimizer.compute_gradients(loss) with tf.variable_scope('clip'): clip_value = float(conf['clip_grad_value']) #clip the gradients minibatch_grads_and_vars = [ (tf.clip_by_value(grad, -clip_value, clip_value), var) for grad, var in minibatch_grads_and_vars ] (minibatchgrads, minibatchvars) = zip(*minibatch_grads_and_vars) #update gradients by accumulating them self.update_gradients = [ grad.assign_add(batchgrad) for batchgrad, grad in zip(minibatchgrads, grads) ] #opperation to apply the gradients grads_and_vars = list(zip(grads, minibatchvars)) apply_gradients_op = optimizer.apply_gradients( grads_and_vars=grads_and_vars, global_step=self.global_step, name='apply_gradients') #all remaining operations with the UPDATE_OPS GraphKeys update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #create an operation to update the gradients, the batch_loss #and do all other update ops self.update_op = tf.group(*([apply_gradients_op] + update_ops), name='update') with self.graph.as_default(): if evaltype != 'None': #validation part with tf.variable_scope(val_scope): #create a variable to hold the validation loss self.validation_loss = tf.get_variable( name='validation_loss', shape=[], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) #create a variable to save the last step where the model #was validated validated_step = tf.get_variable( name='validated_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer( -int(conf['valid_frequency'])), trainable=False) #a check if validation is due self.should_validate = tf.greater_equal( self.global_step - validated_step, int(conf['valid_frequency'])) #compute the loss val_batch_loss = tf.reduce_mean(tasks_val_losses) self.update_loss = self.validation_loss.assign( self.validation_loss + val_batch_loss #/self.valbatches ).op #update the learning rate factor self.half_lr = learning_rate_fact.assign( learning_rate_fact / 2).op #create an operation to updated the validated step self.update_validated_step = validated_step.assign( self.global_step).op #variable to hold the best validation loss so far self.best_validation = tf.get_variable( name='best_validation', shape=[], dtype=tf.float32, initializer=tf.constant_initializer(1.79e+308), trainable=False) #op to update the best velidation loss self.update_best = self.best_validation.assign( self.validation_loss).op #a variable that holds the amount of workers at the #validation point waiting_workers = tf.get_variable( name='waiting_workers', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) #an operation to signal a waiting worker self.waiting = waiting_workers.assign_add(1).op #an operation to set the waiting workers to zero self.reset_waiting = waiting_workers.initializer #an operation to check if all workers are waiting self.all_waiting = tf.equal(waiting_workers, num_replicas - 1) tf.summary.scalar('validation loss', self.validation_loss) else: self.update_loss = None tf.summary.scalar('learning rate', self.learning_rate) #create a histogram for all trainable parameters for param in tf.trainable_variables(): tf.summary.histogram(param.name, param) #create the scaffold self.scaffold = tf.train.Scaffold()
def _random_crop(image_list, crop_height, crop_width): """Crops the given list of images. The function applies the same crop to each image in the list. This can be effectively applied when there are multiple image inputs of the same dimension such as: image, depths, normals = _random_crop([image, depths, normals], 120, 150) Args: image_list: a list of image tensors of the same dimension but possibly varying channel. crop_height: the new height. crop_width: the new width. Returns: the image_list with cropped images. Raises: ValueError: if there are multiple image inputs provided with different size or the images are smaller than the crop dimensions. """ if not image_list: raise ValueError('Empty image_list.') # Compute the rank assertions. rank_assertions = [] for i in range(len(image_list)): image_rank = tf.rank(image_list[i]) rank_assert = tf.Assert(tf.equal(image_rank, 3), [ 'Wrong rank for tensor %s [expected] [actual]', image_list[i].name, 3, image_rank ]) rank_assertions.append(rank_assert) with tf.control_dependencies([rank_assertions[0]]): image_shape = tf.shape(image_list[0]) image_height = image_shape[0] image_width = image_shape[1] crop_size_assert = tf.Assert( tf.logical_and(tf.greater_equal(image_height, crop_height), tf.greater_equal(image_width, crop_width)), ['Crop size greater than the image size.']) asserts = [rank_assertions[0], crop_size_assert] for i in range(1, len(image_list)): image = image_list[i] asserts.append(rank_assertions[i]) with tf.control_dependencies([rank_assertions[i]]): shape = tf.shape(image) height = shape[0] width = shape[1] height_assert = tf.Assert(tf.equal(height, image_height), [ 'Wrong height for tensor %s [expected][actual]', image.name, height, image_height ]) width_assert = tf.Assert(tf.equal(width, image_width), [ 'Wrong width for tensor %s [expected][actual]', image.name, width, image_width ]) asserts.extend([height_assert, width_assert]) # Create a random bounding box. # # Use tf.random_uniform and not numpy.random.rand as doing the former would # generate random numbers at graph eval time, unlike the latter which # generates random numbers at graph definition time. with tf.control_dependencies(asserts): max_offset_height = tf.reshape(image_height - crop_height + 1, []) with tf.control_dependencies(asserts): max_offset_width = tf.reshape(image_width - crop_width + 1, []) offset_height = tf.random_uniform([], maxval=max_offset_height, dtype=tf.int32) offset_width = tf.random_uniform([], maxval=max_offset_width, dtype=tf.int32) return [ _crop(image, offset_height, offset_width, crop_height, crop_width) for image in image_list ]
def build_whole_detection_network(self, input_img_batch, gtboxes_batch): if self.is_training: # ensure shape is [M, 5] gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network feature_to_cropped = self.build_base_network(input_img_batch) # 2. build rpn with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY)): rpn_conv3x3 = slim.conv2d( feature_to_cropped, 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3') rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*2, [1, 1],stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score') rpn_box_pred = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred') rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob') # 3. generate_anchors featuremap_height, featuremap_width = tf.shape(feature_to_cropped)[1], tf.shape(feature_to_cropped)[2] featuremap_height = tf.cast(featuremap_height, tf.float32) featuremap_width = tf.cast(featuremap_width, tf.float32) anchors = anchor_utils.make_anchors(base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=cfgs.ANCHOR_STRIDE, name="make_anchors_forRPN") # with tf.variable_scope('make_anchors'): # anchors = anchor_utils.make_anchors(height=featuremap_height, # width=featuremap_width, # feat_stride=cfgs.ANCHOR_STRIDE[0], # anchor_scales=cfgs.ANCHOR_SCALES, # anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16 # ) # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_RPN'): # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob') # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rois, roi_scores = postprocess_rpn_proposals(rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, img_shape=img_shape, anchors=anchors, is_training=self.is_training) # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: rois_in_img = show_box_in_tensor.draw_boxes_with_scores(img_batch=input_img_batch, boxes=rois, scores=roi_scores) tf.summary.image('all_rpn_rois', rois_in_img) score_gre_05 = tf.reshape(tf.where(tf.greater_equal(roi_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(roi_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gtboxes_batch, img_shape, anchors], [tf.float32, tf.float32]) rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") rpn_labels = tf.reshape(rpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels) # --------------------------------------add smry---------------------------------------------------------------- rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)), [-1]) rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn) acc = tf.reduce_mean(tf.to_float(tf.equal(rpn_cls_category, tf.to_int64(tf.gather(rpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/rpn_accuracy', acc) with tf.control_dependencies([rpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, bbox_targets = \ tf.py_func(proposal_target_layer, [rois, gtboxes_batch], [tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets = tf.reshape(bbox_targets, [-1, 4*(cfgs.CLASS_NUM+1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels) # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred, cls_score = self.build_fastrcnn(feature_to_cropped=feature_to_cropped, rois=rois, img_shape=img_shape) # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob = slim.softmax(cls_score, 'cls_prob') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category = tf.argmax(cls_prob, axis=1) fast_acc = tf.reduce_mean(tf.to_float(tf.equal(cls_category, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc', fast_acc) # 6. postprocess_fastrcnn if not self.is_training: return self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) else: ''' when trian. We need build Loss ''' loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred, rpn_bbox_targets=rpn_bbox_targets, rpn_cls_score=rpn_cls_score, rpn_labels=rpn_labels, bbox_pred=bbox_pred, bbox_targets=bbox_targets, cls_score=cls_score, labels=labels) final_bbox, final_scores, final_category = self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) return final_bbox, final_scores, final_category, loss_dict
def _interpolate(im, x, y, z, out_size): """Bilinear interploation layer. Args: im: A 5D tensor of size [num_batch, depth, height, width, num_channels]. It is the input volume for the transformation layer (tf.float32). x: A tensor of size [num_batch, out_depth, out_height, out_width] representing the inverse coordinate mapping for x (tf.float32). y: A tensor of size [num_batch, out_depth, out_height, out_width] representing the inverse coordinate mapping for y (tf.float32). z: A tensor of size [num_batch, out_depth, out_height, out_width] representing the inverse coordinate mapping for z (tf.float32). out_size: A tuple representing the output size of transformation layer (float). Returns: A transformed tensor (tf.float32). """ with tf.compat.v1.variable_scope('_interpolate'): num_batch = im.get_shape().as_list()[0] depth = im.get_shape().as_list()[1] height = im.get_shape().as_list()[2] width = im.get_shape().as_list()[3] channels = im.get_shape().as_list()[4] x = tf.cast(x, dtype=tf.float32) y = tf.cast(y, dtype=tf.float32) z = tf.cast(z, dtype=tf.float32) depth_f = tf.cast(depth, dtype=tf.float32) height_f = tf.cast(height, dtype=tf.float32) width_f = tf.cast(width, dtype=tf.float32) # Number of disparity interpolated. out_depth = out_size[0] out_height = out_size[1] out_width = out_size[2] zero = tf.zeros([], dtype='int32') # 0 <= z < depth, 0 <= y < height & 0 <= x < width. max_z = tf.cast(tf.shape(input=im)[1] - 1, dtype=tf.int32) max_y = tf.cast(tf.shape(input=im)[2] - 1, dtype=tf.int32) max_x = tf.cast(tf.shape(input=im)[3] - 1, dtype=tf.int32) # Converts scale indices from [-1, 1] to [0, width/height/depth]. x = (x + 1.0) * (width_f - 1.0) / 2.0 y = (y + 1.0) * (height_f - 1.0) / 2.0 z = (z + 1.0) * (depth_f - 1.0) / 2.0 #grid = tf.stack([z, y, x], axis = -1) #st() #grid = tf.reshape(grid, ???) x0 = tf.cast(tf.floor(x), dtype=tf.int32) x1 = x0 + 1 y0 = tf.cast(tf.floor(y), dtype=tf.int32) y1 = y0 + 1 z0 = tf.cast(tf.floor(z), dtype=tf.int32) z1 = z0 + 1 x0_clip = tf.clip_by_value(x0, zero, max_x) x1_clip = tf.clip_by_value(x1, zero, max_x) y0_clip = tf.clip_by_value(y0, zero, max_y) y1_clip = tf.clip_by_value(y1, zero, max_y) z0_clip = tf.clip_by_value(z0, zero, max_z) z1_clip = tf.clip_by_value(z1, zero, max_z) dim3 = width dim2 = width * height dim1 = width * height * depth #repeat can only be run on cpu #base = _repeat( # tf.range(num_batch) * dim1, out_depth * out_height * out_width) base = tf.constant( np.concatenate([ np.array([i * dim1] * out_depth * out_height * out_width) for i in range(BS) ]).astype(np.int32)) #only works for bs = 1 #base = tf.zeros((out_depth * out_height * out_width), dtype=tf.int32) base_z0_y0 = base + z0_clip * dim2 + y0_clip * dim3 base_z0_y1 = base + z0_clip * dim2 + y1_clip * dim3 base_z1_y0 = base + z1_clip * dim2 + y0_clip * dim3 base_z1_y1 = base + z1_clip * dim2 + y1_clip * dim3 idx_z0_y0_x0 = base_z0_y0 + x0_clip idx_z0_y0_x1 = base_z0_y0 + x1_clip idx_z0_y1_x0 = base_z0_y1 + x0_clip idx_z0_y1_x1 = base_z0_y1 + x1_clip idx_z1_y0_x0 = base_z1_y0 + x0_clip idx_z1_y0_x1 = base_z1_y0 + x1_clip idx_z1_y1_x0 = base_z1_y1 + x0_clip idx_z1_y1_x1 = base_z1_y1 + x1_clip # Use indices to lookup pixels in the flat image and restore # channels dim im_flat = tf.reshape(im, tf.stack([-1, channels])) im_flat = tf.cast(im_flat, dtype=tf.float32) i_z0_y0_x0 = tf.gather(im_flat, idx_z0_y0_x0) i_z0_y0_x1 = tf.gather(im_flat, idx_z0_y0_x1) i_z0_y1_x0 = tf.gather(im_flat, idx_z0_y1_x0) i_z0_y1_x1 = tf.gather(im_flat, idx_z0_y1_x1) i_z1_y0_x0 = tf.gather(im_flat, idx_z1_y0_x0) i_z1_y0_x1 = tf.gather(im_flat, idx_z1_y0_x1) i_z1_y1_x0 = tf.gather(im_flat, idx_z1_y1_x0) i_z1_y1_x1 = tf.gather(im_flat, idx_z1_y1_x1) # Finally calculate interpolated values. x0_f = tf.cast(x0, dtype=tf.float32) x1_f = tf.cast(x1, dtype=tf.float32) y0_f = tf.cast(y0, dtype=tf.float32) y1_f = tf.cast(y1, dtype=tf.float32) z0_f = tf.cast(z0, dtype=tf.float32) z1_f = tf.cast(z1, dtype=tf.float32) # Check the out-of-boundary case. x0_valid = tf.cast(tf.less_equal(x0, max_x) & tf.greater_equal(x0, 0), dtype=tf.float32) x1_valid = tf.cast(tf.less_equal(x1, max_x) & tf.greater_equal(x1, 0), dtype=tf.float32) y0_valid = tf.cast(tf.less_equal(y0, max_y) & tf.greater_equal(y0, 0), dtype=tf.float32) y1_valid = tf.cast(tf.less_equal(y1, max_y) & tf.greater_equal(y1, 0), dtype=tf.float32) z0_valid = tf.cast(tf.less_equal(z0, max_z) & tf.greater_equal(z0, 0), dtype=tf.float32) z1_valid = tf.cast(tf.less_equal(z1, max_z) & tf.greater_equal(z1, 0), dtype=tf.float32) w_z0_y0_x0 = tf.expand_dims( ((x1_f - x) * (y1_f - y) * (z1_f - z) * x1_valid * y1_valid * z1_valid), 1) w_z0_y0_x1 = tf.expand_dims( ((x - x0_f) * (y1_f - y) * (z1_f - z) * x0_valid * y1_valid * z1_valid), 1) w_z0_y1_x0 = tf.expand_dims( ((x1_f - x) * (y - y0_f) * (z1_f - z) * x1_valid * y0_valid * z1_valid), 1) w_z0_y1_x1 = tf.expand_dims( ((x - x0_f) * (y - y0_f) * (z1_f - z) * x0_valid * y0_valid * z1_valid), 1) w_z1_y0_x0 = tf.expand_dims( ((x1_f - x) * (y1_f - y) * (z - z0_f) * x1_valid * y1_valid * z0_valid), 1) w_z1_y0_x1 = tf.expand_dims( ((x - x0_f) * (y1_f - y) * (z - z0_f) * x0_valid * y1_valid * z0_valid), 1) w_z1_y1_x0 = tf.expand_dims( ((x1_f - x) * (y - y0_f) * (z - z0_f) * x1_valid * y0_valid * z0_valid), 1) w_z1_y1_x1 = tf.expand_dims( ((x - x0_f) * (y - y0_f) * (z - z0_f) * x0_valid * y0_valid * z0_valid), 1) weights_summed = (w_z0_y0_x0 + w_z0_y0_x1 + w_z0_y1_x0 + w_z0_y1_x1 + w_z1_y0_x0 + w_z1_y0_x1 + w_z1_y1_x0 + w_z1_y1_x1) output = tf.add_n([ w_z0_y0_x0 * i_z0_y0_x0, w_z0_y0_x1 * i_z0_y0_x1, w_z0_y1_x0 * i_z0_y1_x0, w_z0_y1_x1 * i_z0_y1_x1, w_z1_y0_x0 * i_z1_y0_x0, w_z1_y0_x1 * i_z1_y0_x1, w_z1_y1_x0 * i_z1_y1_x0, w_z1_y1_x1 * i_z1_y1_x1 ]) return output
def d_tf_elu(x): return tf.cast(tf.greater_equal(x,0),tf.float32) + (tf_elu(tf.cast(tf.less(x,0),tf.float32) * x) + 1.0) def tf_softmax(x): return tf.nn.softmax(x)
def d_tf_celu(x,alpha=2.0): mask_greater = tf.cast(tf.greater_equal(x,0),tf.float32) mask_smaller = tf.cast(tf.less(x,0),tf.float32) * x middle = tf.exp(tf.divide(mask_smaller,alpha)) return middle + mask_greater
def greater_equal_network(self, x): y = tf.constant([[1], [2], [3], [4]], dtype=tf.float32, name="y") cond = tf.greater_equal(x, y, name="cond") z1 = tf.where(cond, x, y, name="z1") return z1
def _at_least_x_are_equal(a, b, x): """At least `x` of `a` and `b` `Tensors` are equal.""" match = tf.equal(a, b) match = tf.cast(match, tf.int32) return tf.greater_equal(tf.reduce_sum(match), x)
def triplet_loss(features, labels, create_summaries=True): """Softmargin triplet loss. See:: Hermans, Beyer, Leibe: In Defense of the Triplet Loss for Person Re-Identification. arXiv, 2017. Parameters ---------- features : tf.Tensor A matrix of shape NxM that contains the M-dimensional feature vectors of N objects (floating type). labels : tf.Tensor The one-dimensional array of length N that contains for each feature the associated class label (integer type). create_summaries : Optional[bool] If True, creates summaries to monitor training behavior. Returns ------- tf.Tensor A scalar loss tensor. """ eps = tf.constant(1e-5, tf.float32) nil = tf.constant(0., tf.float32) almost_inf = tf.constant(1e+10, tf.float32) squared_distance_mat = _pdist(features) distance_mat = tf.sqrt(tf.maximum(nil, eps + squared_distance_mat)) label_mat = tf.cast( tf.equal(tf.reshape(labels, (-1, 1)), tf.reshape(labels, (1, -1))), tf.float32) positive_distance = tf.reduce_max(label_mat * distance_mat, axis=1) negative_distance = tf.reduce_min((label_mat * almost_inf) + distance_mat, axis=1) loss = tf.nn.softplus(positive_distance - negative_distance) if create_summaries: fraction_invalid_pdist = tf.reduce_mean( tf.cast(tf.less_equal(squared_distance_mat, -eps), tf.float32)) tf.summary.scalar("fraction_invalid_pdist", fraction_invalid_pdist) fraction_active_triplets = tf.reduce_mean( tf.cast(tf.greater_equal(loss, 1e-5), tf.float32)) tf.summary.scalar("fraction_active_triplets", fraction_active_triplets) embedding_squared_norm = tf.reduce_mean( tf.reduce_sum(tf.square(features), axis=1)) tf.summary.scalar("mean squared feature norm", embedding_squared_norm) mean_distance = tf.reduce_mean(distance_mat) tf.summary.scalar("mean feature distance", mean_distance) mean_positive_distance = tf.reduce_mean(positive_distance) tf.summary.scalar("mean positive distance", mean_positive_distance) mean_negative_distance = tf.reduce_mean(negative_distance) tf.summary.scalar("mean negative distance", mean_negative_distance) return tf.reduce_mean(loss)
def __init__(self, config, wordEmbedding): # 定义模型的输入 self.inputX = tf.placeholder(tf.int32, [None, config.sequenceLength], name="inputX") self.inputY = tf.placeholder(tf.int32, [None], name="inputY") self.dropoutKeepProb = tf.placeholder(tf.float32, name="dropoutKeepProb") # 定义l2损失 l2Loss = tf.constant(0.0) # 词嵌入层 with tf.name_scope("embedding"): # 利用预训练的词向量初始化词嵌入矩阵 self.W = tf.Variable(tf.cast(wordEmbedding, dtype=tf.float32, name="word2vec"), name="W") # 利用词嵌入矩阵将输入的数据中的词转换成词向量,维度[batch_size, sequence_length, embedding_size] self.embeddedWords = tf.nn.embedding_lookup(self.W, self.inputX) # 卷积的输入是思维[batch_size, width, height, channel],因此需要增加维度,用tf.expand_dims来增大维度 self.embeddedWordsExpanded = tf.expand_dims(self.embeddedWords, -1) print('CCCCCCCCCCCC', self.embeddedWordsExpanded.shape) #(?, 200, 200, 1) # 创建卷积和池化层 pooledOutputs = [] # 有三种size的filter,3, 4, 5,textCNN是个多通道单层卷积的模型,可以看作三个单层的卷积模型的融合 for i, filterSize in enumerate(config.model.filterSizes): with tf.name_scope('conv-maxpool-%s' % filterSize): # 卷积层,卷积核尺寸为filterSize * embeddingSize,卷积核的个数为numFilters # 初始化权重矩阵和偏置 filterShape = [ filterSize, config.model.embeddingSize, 1, config.model.numFilters ] print('filterShape.shape', filterShape) #[2, 200, 1, 128] W = tf.Variable(tf.truncated_normal(filterShape, stddev=0.1), name='W') b = tf.Variable(tf.constant(0.1, shape=[config.model.numFilters]), name='b') conv = tf.nn.conv2d(self.embeddedWordsExpanded, W, strides=[1, 1, 1, 1], padding='VALID', name='conv') # relu函数的非线性映射 h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu') print('AAAAAAAAAAA', h.shape) #(?, 199, 1, 128) # 池化层,最大池化,池化是对卷积后的序列取一个最大值 pooled = tf.nn.max_pool( h, # ksize shape: [batch, height, width, channels] ksize=[1, config.sequenceLength - filterSize + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") print('BBBAAAAAAAA', pooled.shape) #(?, 1, 1, 128) pooledOutputs.append(pooled) # 将三种size的filter的输出一起加入到列表中 print('pooledOutputs.shape', pooledOutputs) # 得到CNN网络的输出长度 numFiltersTotal = config.model.numFilters * len( config.model.filterSizes) # 池化后的维度不变,按照最后的维度channel来concat self.hPool = tf.concat(pooledOutputs, 3) print('DDDDDDDDDDD', self.hPool.shape) # 摊平成二维的数据输入到全连接层 self.hPoolFlat = tf.reshape(self.hPool, [-1, numFiltersTotal]) # dropout with tf.name_scope("dropout"): self.hDrop = tf.nn.dropout(self.hPoolFlat, self.dropoutKeepProb) # 全连接层的输出 with tf.name_scope('output'): outputW = tf.get_variable( 'outputW', shape=[numFiltersTotal, config.numClasses], initializer=tf.contrib.layers.xavier_initializer()) outputB = tf.Variable(tf.constant(0.1, shape=[config.numClasses]), name="outputB") l2Loss += tf.nn.l2_loss(outputW) l2Loss += tf.nn.l2_loss(outputB) self.logits = tf.nn.xw_plus_b(self.hDrop, outputW, outputB, name='logits') if config.numClasses == 1: self.predictions = tf.cast(tf.greater_equal(self.logits, 0.0), tf.int32, name='predictions') elif config.numClasses > 1: self.predictions = tf.argmax(self.logits, axis=-1, name="predictions") print(self.predictions) # 计算二元交叉熵损失 with tf.name_scope("loss"): if config.numClasses == 1: losses = tf.nn.sigmoid_cross_entropy_with_logits( logits=self.logits, labels=tf.cast(tf.reshape(self.inputY, [-1, 1]), dtype=tf.float32)) elif config.numClasses > 1: losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits, labels=self.inputY) self.loss = tf.reduce_mean( losses) + config.model.l2RegLambda * l2Loss
def where_network(self, x): y = tf.constant([[1], [1], [1], [1]], dtype=tf.float32, name='y') cond = tf.greater_equal(x, y, name='cond') z1 = tf.where(cond, x, y, name="z1") return z1
def convert_predictions_to_binary(preds, thres): # return tf.where(preds > thres, tf.ones(tf.shape(preds)), tf.zeros(tf.shape(preds))) return tf.cast(tf.greater_equal(preds, thres), tf.float32)
def __ge__(self, other): # >= return tf.greater_equal(self._q, other)