Пример #1
0
    def _calculate(self):
        # On tpu we strive to stack tensors together and perform ops once on the
        # entire stack, to save time HBM memory. We thus stack the batch-of-first-
        # frames and the batch-of-second frames, for both depth and RGB. The batch
        # dimension of rgb_stack and gt_depth_stack are thus twice the original
        # batch size.

        # Create stacks for features that need to be scaled into pyramids for
        # multi-scale training.
        rgb_stack_ = tf.concat(self._endpoints['rgb'], axis=0)
        flipped_rgb_stack_ = tf.concat(self._endpoints['rgb'][::-1], axis=0)
        predicted_depth_stack_ = tf.concat(self._endpoints['predicted_depth'],
                                           axis=0)
        flipped_predicted_depth_stack_ = tf.concat(
            self._endpoints['predicted_depth'][::-1], axis=0)
        residual_translation_ = tf.concat(
            self._endpoints['residual_translation'], axis=0)
        flipped_residual_translation_ = tf.concat(
            self._endpoints['residual_translation'][::-1], axis=0)
        intrinsics_mat_ = tf.concat(self._endpoints['intrinsics_mat'], axis=0)

        # Create pyramids from each stack to support multi-scale training.
        num_scales = self._params.num_scales
        rgb_pyramid = _get_pyramid(rgb_stack_, num_scales=num_scales)
        flipped_rgb_pyramid = _get_pyramid(flipped_rgb_stack_,
                                           num_scales=num_scales)
        predicted_depth_pyramid = _get_pyramid(predicted_depth_stack_,
                                               num_scales=num_scales)
        flipped_predicted_depth_pyramid = _get_pyramid(
            flipped_predicted_depth_stack_, num_scales=num_scales)
        residual_translation_pyramid = _get_pyramid(residual_translation_,
                                                    num_scales=num_scales)
        flipped_residual_translation_pyramid = _get_pyramid(
            flipped_residual_translation_, num_scales=num_scales)
        intrinsics_mat_pyramid = _get_intrinsics_mat_pyramid(
            intrinsics_mat_, num_scales=num_scales)
        validity_mask_ = self._endpoints.get('validity_mask')
        if validity_mask_ is not None:
            validity_mask_ = tf.concat(validity_mask_, axis=0)
            validity_mask_pyramid = _get_pyramid(validity_mask_, num_scales,
                                                 _min_pool2d)
        else:
            validity_mask_pyramid = [None] * num_scales

        if 'groundtruth_depth' in self._endpoints:
            gt_depth_stack_ = tf.concat(self._endpoints['groundtruth_depth'],
                                        axis=0)
            gt_depth_pyramid = _get_pyramid(gt_depth_stack_,
                                            num_scales=num_scales)
            if 'groundtruth_depth_weight' in self._endpoints:
                gt_depth_weight_stack_ = tf.concat(
                    self._endpoints['groundtruth_depth_weight'], axis=0)
            else:
                gt_depth_weight_stack_ = tf.cast(
                    tf.greater(gt_depth_stack_, 0.2), tf.float32)
            gt_depth_weight_pyramid = _get_pyramid(gt_depth_weight_stack_,
                                                   num_scales=num_scales)

            if 'groundtruth_depth_filter' in self._endpoints:
                depth_filter_ = tf.concat(
                    self._endpoints['groundtruth_depth_filter'], axis=0)
                depth_filter_ = tf.cast(depth_filter_, tf.float32)
                depth_filter_pyramid = _get_pyramid(gt_depth_stack_,
                                                    num_scales=num_scales)

        # Calculate losses at each scale.  Iterate in reverse so that the final
        # output values are set at scale 0.
        for s in reversed(range(self._params.num_scales)):
            # Weight applied to all losses at this scale.
            scale_w = 1.0 / 2**s

            rgb_stack = rgb_pyramid[s]
            predicted_depth_stack = predicted_depth_pyramid[s]
            flipped_predicted_depth_stack = flipped_predicted_depth_pyramid[s]

            if 'groundtruth_depth' in self._endpoints:
                gt_depth_stack = gt_depth_pyramid[s]
                depth_error = tf.abs(gt_depth_stack - predicted_depth_stack)

                # Weigh the spatial loss if a weight map is provided. Otherwise, revert
                # to original behavior.
                gt_depth_weight_stack = gt_depth_weight_pyramid[s]
                depth_error = depth_error * gt_depth_weight_stack

                # Optionally filter the depth map if a boolean depth filter is provided.
                # We use a TPU-friendly equivalent of tf.boolean_mask.
                depth_filter = tf.ones_like(depth_error, tf.float32)
                if 'groundtruth_depth_filter' in self._endpoints:
                    depth_filter = depth_filter_pyramid[s]

                self._losses['depth_supervision'] += scale_w * tf.reduce_mean(
                    depth_error * depth_filter) / tf.reduce_mean(depth_filter)

            # In theory, the training losses should be agnostic to the global scale of
            # the predicted depth. However in reality second order effects can lead to
            # (https://en.wikipedia.org/wiki/Von_Neumann_stability_analysis) diverging
            # modes. For some reason this happens when training on TPU. Since the
            # scale is immaterial anyway, we normalize it out, and the training
            # stabilizes.
            #
            # Note that the depth supervision term, which is sensitive to the scale,
            # was applied before this normalization. Therefore the scale of the depth
            # is learned.
            mean_depth = tf.reduce_mean(predicted_depth_stack)

            # When training starts, the depth sometimes tends to collapse to a
            # constant value, which seems to be a fixed point where the trainig can
            # stuck. To discourage this collapse, we penalize the reciprocal of the
            # variance with a tiny weight. Note that the mean of predicted_depth is
            # one, hence we subtract 1.0.
            depth_var = tf.reduce_mean(
                tf.square(predicted_depth_stack / mean_depth - 1.0))
            self._losses['depth_variance'] = scale_w * 1.0 / depth_var

            if self._params.scale_normalization:
                predicted_depth_stack /= mean_depth
                flipped_predicted_depth_stack /= mean_depth

            disp = 1.0 / predicted_depth_stack

            mean_disp = tf.reduce_mean(disp, axis=[1, 2, 3], keep_dims=True)
            self._losses['depth_smoothing'] += (
                scale_w * regularizers.joint_bilateral_smoothing(
                    disp / mean_disp, rgb_stack))
            self._output_endpoints['disparity'] = disp

            flipped_rgb_stack = flipped_rgb_pyramid[s]

            background_translation = tf.concat(
                self._endpoints['background_translation'], axis=0)
            flipped_background_translation = tf.concat(
                self._endpoints['background_translation'][::-1], axis=0)
            residual_translation = residual_translation_pyramid[s]
            flipped_residual_translation = flipped_residual_translation_pyramid[
                s]
            if self._params.scale_normalization:
                background_translation /= mean_depth
                flipped_background_translation /= mean_depth
                residual_translation /= mean_depth
                flipped_residual_translation /= mean_depth
            translation = residual_translation + background_translation
            flipped_translation = (flipped_residual_translation +
                                   flipped_background_translation)

            rotation = tf.concat(self._endpoints['rotation'], axis=0)
            flipped_rotation = tf.concat(self._endpoints['rotation'][::-1],
                                         axis=0)
            intrinsics_mat = intrinsics_mat_pyramid[s]
            intrinsics_mat_inv = intrinsics_utils.invert_intrinsics_matrix(
                intrinsics_mat)
            validity_mask = validity_mask_pyramid[s]

            transformed_depth = transform_depth_map.using_motion_vector(
                tf.squeeze(predicted_depth_stack, axis=-1), translation,
                rotation, intrinsics_mat, intrinsics_mat_inv)
            flipped_predicted_depth_stack = tf.squeeze(
                flipped_predicted_depth_stack, axis=-1)
            if self._params.target_depth_stop_gradient:
                flipped_predicted_depth_stack = tf.stop_gradient(
                    flipped_predicted_depth_stack)
            # The first and second halves of the batch not contain Frame1's and
            # Frame2's depths transformed onto Frame2 and Frame1 respectively. Te
            # demand consistency, we need to `flip` `predicted_depth` as well.
            loss_endpoints = (
                consistency_losses.rgbd_and_motion_consistency_loss(
                    transformed_depth,
                    rgb_stack,
                    flipped_predicted_depth_stack,
                    flipped_rgb_stack,
                    rotation,
                    translation,
                    flipped_rotation,
                    flipped_translation,
                    validity_mask=validity_mask))

            normalized_trans = regularizers.normalize_motion_map(
                residual_translation, translation)
            self._losses[
                'motion_smoothing'] += scale_w * regularizers.l1smoothness(
                    normalized_trans, self._weights.motion_drift == 0)
            self._losses[
                'motion_drift'] += scale_w * regularizers.sqrt_sparsity(
                    normalized_trans)
            self._losses['depth_consistency'] += (
                scale_w * loss_endpoints['depth_error'])
            self._losses[
                'rgb_consistency'] += scale_w * loss_endpoints['rgb_error']
            self._losses[
                'ssim'] += scale_w * 0.5 * loss_endpoints['ssim_error']

            self._losses['rotation_cycle_consistency'] += (
                scale_w * loss_endpoints['rotation_error'])
            self._losses['translation_cycle_consistency'] += (
                scale_w * loss_endpoints['translation_error'])

            self._output_endpoints['depth_proximity_weight'] = loss_endpoints[
                'depth_proximity_weight']
            self._output_endpoints['trans'] = translation
            self._output_endpoints['inv_trans'] = flipped_translation

        for k, w in self._weights.as_dict().items():
            # multiply by 2 to match the scale of the old code.
            self._losses[k] *= w * 2

        if tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES):
            self._losses[tf.GraphKeys.REGULARIZATION_LOSSES] = tf.add_n(
                tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
Пример #2
0
def box_matching(boxes, gt_boxes, gt_classes):
    """Match boxes to groundtruth boxes.

  Given the proposal boxes and the groundtruth boxes and classes, perform the
  groundtruth matching by taking the argmax of the IoU between boxes and
  groundtruth boxes.

  Args:
    boxes: a tensor of shape of [batch_size, N, 4] representing the box
      coordiantes to be matched to groundtruth boxes.
    gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing
      the groundtruth box coordinates. It is padded with -1s to indicate the
      invalid boxes.
    gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box
      classes. It is padded with -1s to indicate the invalid classes.

  Returns:
    matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing
      the matched groundtruth box coordinates for each input box. If the box
      does not overlap with any groundtruth boxes, the matched boxes of it
      will be set to all 0s.
    matched_gt_classes: a tensor of shape of [batch_size, N], representing
      the matched groundtruth classes for each input box. If the box does not
      overlap with any groundtruth boxes, the matched box classes of it will
      be set to 0, which corresponds to the background class.
    matched_gt_indices: a tensor of shape of [batch_size, N], representing
      the indices of the matched groundtruth boxes in the original gt_boxes
      tensor. If the box does not overlap with any groundtruth boxes, the
      index of the matched groundtruth will be set to -1.
    matched_iou: a tensor of shape of [batch_size, N], representing the IoU
      between the box and its matched groundtruth box. The matched IoU is the
      maximum IoU of the box and all the groundtruth boxes.
    iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix
      between boxes and the groundtruth boxes. The IoU between a box and the
      invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1.
  """
    # Compute IoU between boxes and gt_boxes.
    # iou <- [batch_size, N, K]
    iou = box_utils.bbox_overlap(boxes, gt_boxes)

    # max_iou <- [batch_size, N]
    # 0.0 -> no match to gt, or -1.0 match to no gt
    matched_iou = tf.reduce_max(iou, axis=-1)

    # background_box_mask <- bool, [batch_size, N]
    background_box_mask = tf.less_equal(matched_iou, 0.0)

    argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32)

    argmax_iou_indices_shape = tf.shape(argmax_iou_indices)
    batch_indices = (
        tf.expand_dims(tf.range(argmax_iou_indices_shape[0]), axis=-1) *
        tf.ones([1, argmax_iou_indices_shape[-1]], dtype=tf.int32))
    gather_nd_indices = tf.stack([batch_indices, argmax_iou_indices], axis=-1)

    matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices)
    matched_gt_boxes = tf.where(
        tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]),
        tf.zeros_like(matched_gt_boxes, dtype=tf.float32), matched_gt_boxes)

    matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices)
    matched_gt_classes = tf.where(background_box_mask,
                                  tf.zeros_like(matched_gt_classes),
                                  matched_gt_classes)

    matched_gt_indices = tf.where(background_box_mask,
                                  -tf.ones_like(argmax_iou_indices),
                                  argmax_iou_indices)

    return (matched_gt_boxes, matched_gt_classes, matched_gt_indices,
            matched_iou, iou)
    def decode(self, tf_example_string_tensor):
        """Decodes serialized tensorflow example and returns a tensor dictionary.

    Args:
      tf_example_string_tensor: a string tensor holding a serialized tensorflow
        example proto.

    Returns:
      A dictionary of the following tensors.
      fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
        containing image.
      fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
        shape [2] containing shape of the image.
      fields.InputDataFields.source_id - string tensor containing original
        image id.
      fields.InputDataFields.key - string tensor with unique sha256 hash key.
      fields.InputDataFields.filename - string tensor with original dataset
        filename.
      fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
        [None, 4] containing box corners.
      fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
        shape [None] indicating the weights of groundtruth boxes.
      fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
        [None] containing containing object mask area in pixel squared.
      fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
        [None] indicating if the boxes enclose a crowd.

    Optional:
      fields.InputDataFields.groundtruth_image_confidences - 1D float tensor of
        shape [None] indicating if a class is present in the image (1.0) or
        a class is not present in the image (0.0).
      fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
        shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
        is width; 3rd dim is the number of additional channels.
      fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
        [None] indicating if the boxes represent `difficult` instances.
      fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
        [None] indicating if the boxes represent `group_of` instances.
      fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
        shape [None, num_keypoints, 2] containing keypoints, where the
        coordinates of the keypoints are ordered (y, x).
      fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool
        tensor of shape [None, num_keypoints] containing keypoint visibilites.
      fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
        shape [None, None, None] containing instance masks.
      fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
        [None * num_classes] containing flattened multiclass scores for
        groundtruth boxes.
      fields.InputDataFields.context_features - 1D float32 tensor of shape
        [context_feature_length * num_context_features]
      fields.InputDataFields.context_feature_length - int32 tensor specifying
        the length of each feature in context_features
    """
        serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
        decoder = slim_example_decoder.TFExampleDecoder(
            self.keys_to_features, self.items_to_handlers)
        keys = decoder.list_items()
        tensors = decoder.decode(serialized_example, items=keys)
        tensor_dict = dict(zip(keys, tensors))
        is_crowd = fields.InputDataFields.groundtruth_is_crowd
        tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
        tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
        tensor_dict[
            fields.InputDataFields.original_image_spatial_shape] = tf.shape(
                tensor_dict[fields.InputDataFields.image])[:2]

        if fields.InputDataFields.image_additional_channels in tensor_dict:
            channels = tensor_dict[
                fields.InputDataFields.image_additional_channels]
            channels = tf.squeeze(channels, axis=3)
            channels = tf.transpose(channels, perm=[1, 2, 0])
            tensor_dict[
                fields.InputDataFields.image_additional_channels] = channels

        def default_groundtruth_weights():
            return tf.ones([
                tf.shape(
                    tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
            ],
                           dtype=tf.float32)

        tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
            tf.greater(
                tf.shape(tensor_dict[
                    fields.InputDataFields.groundtruth_weights])[0], 0),
            lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
            default_groundtruth_weights)

        if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
            # Set all keypoints that are not labeled to NaN.
            gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
            gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
            visibilities_tiled = tf.tile(
                tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1), [1, 1, 2])
            tensor_dict[gt_kpt_fld] = tf.where(
                visibilities_tiled, tensor_dict[gt_kpt_fld],
                np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))

        if self._expand_hierarchy_labels:
            input_fields = fields.InputDataFields
            image_classes, image_confidences = self._expand_image_label_hierarchy(
                tensor_dict[input_fields.groundtruth_image_classes],
                tensor_dict[input_fields.groundtruth_image_confidences])
            tensor_dict[input_fields.groundtruth_image_classes] = image_classes
            tensor_dict[input_fields.groundtruth_image_confidences] = (
                image_confidences)

            box_fields = [
                fields.InputDataFields.groundtruth_group_of,
                fields.InputDataFields.groundtruth_is_crowd,
                fields.InputDataFields.groundtruth_difficult,
                fields.InputDataFields.groundtruth_area,
                fields.InputDataFields.groundtruth_boxes,
                fields.InputDataFields.groundtruth_weights,
            ]

            def expand_field(field_name):
                return self._expansion_box_field_labels(
                    tensor_dict[input_fields.groundtruth_classes],
                    tensor_dict[field_name])

            # pylint: disable=cell-var-from-loop
            for field in box_fields:
                if field in tensor_dict:
                    tensor_dict[field] = tf.cond(
                        tf.size(tensor_dict[field]) > 0,
                        lambda: expand_field(field),
                        lambda: tensor_dict[field])
            # pylint: enable=cell-var-from-loop

            tensor_dict[input_fields.groundtruth_classes] = (
                self._expansion_box_field_labels(
                    tensor_dict[input_fields.groundtruth_classes],
                    tensor_dict[input_fields.groundtruth_classes], True))

        if fields.InputDataFields.groundtruth_group_of in tensor_dict:
            group_of = fields.InputDataFields.groundtruth_group_of
            tensor_dict[group_of] = tf.cast(tensor_dict[group_of],
                                            dtype=tf.bool)

        if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
            tensor_dict[
                fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
                    tensor_dict[
                        fields.InputDataFields.groundtruth_dp_num_points],
                    dtype=tf.int32)
            tensor_dict[
                fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
                    tensor_dict[
                        fields.InputDataFields.groundtruth_dp_part_ids],
                    dtype=tf.int32)

        if fields.InputDataFields.groundtruth_track_ids in tensor_dict:
            tensor_dict[
                fields.InputDataFields.groundtruth_track_ids] = tf.cast(
                    tensor_dict[fields.InputDataFields.groundtruth_track_ids],
                    dtype=tf.int32)

        return tensor_dict
Пример #4
0
def to_homogenous(tensor):
    one = tf.ones_like(tensor[Ellipsis, :1])
    return tf.concat([tensor, one], -1)
Пример #5
0
        def metric_fn(**kwargs):
            """Returns a dictionary that has the evaluation metrics."""
            if params['nms_configs'].get('pyfunc', True):
                detections_bs = []
                nms_configs = params['nms_configs']
                for index in range(kwargs['boxes'].shape[0]):
                    detections = tf.numpy_function(
                        functools.partial(nms_np.per_class_nms,
                                          nms_configs=nms_configs),
                        [
                            kwargs['boxes'][index],
                            kwargs['scores'][index],
                            kwargs['classes'][index],
                            tf.slice(kwargs['image_ids'], [index], [1]),
                            tf.slice(kwargs['image_scales'], [index], [1]),
                            params['num_classes'],
                            nms_configs['max_output_size'],
                        ], tf.float32)
                    detections_bs.append(detections)
                detections_bs = postprocess.transform_detections(
                    tf.stack(detections_bs))
            else:
                # These two branches should be equivalent, but currently they are not.
                # TODO(tanmingxing): enable the non_pyfun path after bug fix.
                nms_boxes, nms_scores, nms_classes, _ = postprocess.per_class_nms(
                    params, kwargs['boxes'], kwargs['scores'],
                    kwargs['classes'], kwargs['image_scales'])
                img_ids = tf.cast(tf.expand_dims(kwargs['image_ids'], -1),
                                  nms_scores.dtype)
                detections_bs = [
                    img_ids * tf.ones_like(nms_scores),
                    nms_boxes[:, :, 1],
                    nms_boxes[:, :, 0],
                    nms_boxes[:, :, 3] - nms_boxes[:, :, 1],
                    nms_boxes[:, :, 2] - nms_boxes[:, :, 0],
                    nms_scores,
                    nms_classes,
                ]
                detections_bs = tf.stack(detections_bs,
                                         axis=-1,
                                         name='detnections')

            if params.get('testdev_dir', None):
                logging.info('Eval testdev_dir %s', params['testdev_dir'])
                eval_metric = coco_metric.EvaluationMetric(
                    testdev_dir=params['testdev_dir'])
                coco_metrics = eval_metric.estimator_metric_fn(
                    detections_bs, tf.zeros([1]))
            else:
                logging.info('Eval val with groudtruths %s.',
                             params['val_json_file'])
                eval_metric = coco_metric.EvaluationMetric(
                    filename=params['val_json_file'],
                    label_map=params['label_map'])
                coco_metrics = eval_metric.estimator_metric_fn(
                    detections_bs, kwargs['groundtruth_data'])

            # Add metrics to output.
            cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
            box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
            output_metrics = {
                'cls_loss': cls_loss,
                'box_loss': box_loss,
            }
            output_metrics.update(coco_metrics)
            return output_metrics
    def build_inpaint_net(self, x, mask, reuse=False,
                          training=True, padding='SAME', name='inpaint_net'):
        """Inpaint network.

        Args:
            x: incomplete image, [-1, 1]
            mask: mask region {0, 1}
        Returns:
            [-1, 1] as predicted image
        """
        xin = x
        offset_flow = None
        ones_x = tf.ones_like(x)[:, :, :, 0:1]
        x = tf.concat([x, ones_x, ones_x*mask], axis=3)

        # two stage network
        cnum = 48
        with tf.compat.v1.variable_scope(name, reuse=reuse), \
                arg_scope([gen_conv, gen_deconv],
                          training=training, padding=padding):
            # stage1
            x = gen_conv(x, cnum, 5, 1, name='conv1')
            x = gen_conv(x, 2*cnum, 3, 2, name='conv2_downsample')
            x = gen_conv(x, 2*cnum, 3, 1, name='conv3')
            x = gen_conv(x, 4*cnum, 3, 2, name='conv4_downsample')
            x = gen_conv(x, 4*cnum, 3, 1, name='conv5')
            x = gen_conv(x, 4*cnum, 3, 1, name='conv6')
            mask_s = resize_mask_like(mask, x)
            x = gen_conv(x, 4*cnum, 3, rate=2, name='conv7_atrous')
            x = gen_conv(x, 4*cnum, 3, rate=4, name='conv8_atrous')
            x = gen_conv(x, 4*cnum, 3, rate=8, name='conv9_atrous')
            x = gen_conv(x, 4*cnum, 3, rate=16, name='conv10_atrous')
            x = gen_conv(x, 4*cnum, 3, 1, name='conv11')
            x = gen_conv(x, 4*cnum, 3, 1, name='conv12')
            x = gen_deconv(x, 2*cnum, name='conv13_upsample')
            x = gen_conv(x, 2*cnum, 3, 1, name='conv14')
            x = gen_deconv(x, cnum, name='conv15_upsample')
            x = gen_conv(x, cnum//2, 3, 1, name='conv16')
            x = gen_conv(x, 3, 3, 1, activation=None, name='conv17')
            x = tf.nn.tanh(x)
            x_stage1 = x

            # stage2, paste result as input
            x = x*mask + xin[:, :, :, 0:3]*(1.-mask)
            x.set_shape(xin[:, :, :, 0:3].get_shape().as_list())
            # conv branch
            # xnow = tf.concat([x, ones_x, ones_x*mask], axis=3)
            xnow = x
            x = gen_conv(xnow, cnum, 5, 1, name='xconv1')
            x = gen_conv(x, cnum, 3, 2, name='xconv2_downsample')
            x = gen_conv(x, 2*cnum, 3, 1, name='xconv3')
            x = gen_conv(x, 2*cnum, 3, 2, name='xconv4_downsample')
            x = gen_conv(x, 4*cnum, 3, 1, name='xconv5')
            x = gen_conv(x, 4*cnum, 3, 1, name='xconv6')
            x = gen_conv(x, 4*cnum, 3, rate=2, name='xconv7_atrous')
            x = gen_conv(x, 4*cnum, 3, rate=4, name='xconv8_atrous')
            x = gen_conv(x, 4*cnum, 3, rate=8, name='xconv9_atrous')
            x = gen_conv(x, 4*cnum, 3, rate=16, name='xconv10_atrous')
            x_hallu = x
            # attention branch
            x = gen_conv(xnow, cnum, 5, 1, name='pmconv1')
            x = gen_conv(x, cnum, 3, 2, name='pmconv2_downsample')
            x = gen_conv(x, 2*cnum, 3, 1, name='pmconv3')
            x = gen_conv(x, 4*cnum, 3, 2, name='pmconv4_downsample')
            x = gen_conv(x, 4*cnum, 3, 1, name='pmconv5')
            x = gen_conv(x, 4*cnum, 3, 1, name='pmconv6',
                                activation=tf.nn.relu)
            x, offset_flow = contextual_attention(x, x, mask_s, 3, 1, rate=2)
            x = gen_conv(x, 4*cnum, 3, 1, name='pmconv9')
            x = gen_conv(x, 4*cnum, 3, 1, name='pmconv10')
            pm = x
            x = tf.concat([x_hallu, pm], axis=3)

            x = gen_conv(x, 4*cnum, 3, 1, name='allconv11')
            x = gen_conv(x, 4*cnum, 3, 1, name='allconv12')
            x = gen_deconv(x, 2*cnum, name='allconv13_upsample')
            x = gen_conv(x, 2*cnum, 3, 1, name='allconv14')
            x = gen_deconv(x, cnum, name='allconv15_upsample')
            x = gen_conv(x, cnum//2, 3, 1, name='allconv16')
            x = gen_conv(x, 3, 3, 1, activation=None, name='allconv17')
            x = tf.nn.tanh(x)
            x_stage2 = x
        return x_stage1, x_stage2, offset_flow
    b_real, phase_in=phase,
    scope='b2a')  # generate fake-unfixed-BF using real-fixed BF

fake_b_dis = discriminator.discriminator(fake_b, training=phase, scope='b')
fake_a_dis = discriminator.discriminator(fake_a, training=phase, scope='a')

rec_a = generator.generator(
    fake_b, phase_in=phase,
    scope='b2a')  # reconstructing unfixed-BF from fake-fixed-BF

rec_b = generator.generator(
    fake_a, phase_in=phase,
    scope='a2b')  # reconstructing fixed-BF from fake-unfixed-BF

gen_a2b_loss = tf.reduce_mean(
    tf.losses.mean_squared_error(fake_b_dis, tf.ones_like(fake_b_dis)))
gen_b2a_loss = tf.reduce_mean(
    tf.losses.mean_squared_error(fake_a_dis, tf.ones_like(fake_a_dis)))

# cycle_loss_unfixed = tf.reduce_mean(tf.abs(a_real - rec_a))
# cycle_loss_fixed = tf.reduce_mean(tf.abs(b_real - rec_b))

cycle_loss_unfixed = tf.reduce_mean(tf.losses.mean_squared_error(
    a_real, rec_a))
cycle_loss_fixed = tf.reduce_mean(tf.losses.mean_squared_error(b_real, rec_b))

# final generator loss
g_loss = (gen_a2b_loss + gen_b2a_loss) + 10 * (
    cycle_loss_unfixed + cycle_loss_fixed)  # FOR deepDeconv
# g_loss =  (gen_a2b_loss + gen_b2a_loss) + 10 * (cycle_loss_unfixed + cycle_loss_fixed)   # FOR das_despeckle
# g_loss =  (gen_a2b_loss + gen_b2a_loss) + 20 * (cycle_loss_unfixed + cycle_loss_fixed)   # FOR deconv_despeckle
Пример #8
0
def retrieve(features, retriever_beam_size, mode, params):
    """Do retrieval."""
    tokenizer, vocab_lookup_table = bert_utils.get_tf_tokenizer(
        params["retriever_module_path"])

    question_token_ids = tokenizer.tokenize(
        tf.expand_dims(features["question"], 0))
    question_token_ids = tf.cast(
        question_token_ids.merge_dims(1, 2).to_tensor(), tf.int32)
    cls_token_id = vocab_lookup_table.lookup(tf.constant("[CLS]"))
    sep_token_id = vocab_lookup_table.lookup(tf.constant("[SEP]"))
    question_token_ids = tf.concat(
        [[[tf.cast(cls_token_id, tf.int32)]], question_token_ids,
         [[tf.cast(sep_token_id, tf.int32)]]], -1)

    retriever_module = hub.Module(
        params["retriever_module_path"],
        tags={"train"} if mode == tf_estimator.ModeKeys.TRAIN else {},
        trainable=True)

    # [1, projection_size]
    question_emb = retriever_module(inputs=dict(
        input_ids=question_token_ids,
        input_mask=tf.ones_like(question_token_ids),
        segment_ids=tf.zeros_like(question_token_ids)),
                                    signature="projected")

    block_emb, searcher = scann_utils.load_scann_searcher(
        var_name="block_emb",
        checkpoint_path=os.path.join(params["retriever_module_path"],
                                     "encoded", "encoded.ckpt"),
        num_neighbors=retriever_beam_size)

    # [1, retriever_beam_size]
    retrieved_block_ids, _ = searcher.search_batched(question_emb)

    # [1, retriever_beam_size, projection_size]
    retrieved_block_emb = tf.gather(block_emb, retrieved_block_ids)

    # [retriever_beam_size]
    retrieved_block_ids = tf.squeeze(retrieved_block_ids)

    # [retriever_beam_size, projection_size]
    retrieved_block_emb = tf.squeeze(retrieved_block_emb)

    # [1, retriever_beam_size]
    retrieved_logits = tf.matmul(question_emb,
                                 retrieved_block_emb,
                                 transpose_b=True)

    # [retriever_beam_size]
    retrieved_logits = tf.squeeze(retrieved_logits, 0)

    blocks_dataset = tf.data.TFRecordDataset(params["block_records_path"],
                                             buffer_size=512 * 1024 * 1024)
    blocks_dataset = blocks_dataset.batch(params["num_block_records"],
                                          drop_remainder=True)
    blocks = tf.get_local_variable(
        "blocks",
        initializer=tf.data.experimental.get_single_element(blocks_dataset))
    retrieved_blocks = tf.gather(blocks, retrieved_block_ids)
    return RetrieverOutputs(logits=retrieved_logits, blocks=retrieved_blocks)
Пример #9
0
def sinc(inputs):
    x = tf.where(
        tf.abs(inputs) < epsilon, epsilon * tf.ones_like(inputs), inputs)
    return tf.sin(x) / x
Пример #10
0
def _compute_word_overlap(context_ids, context_len, question_ids, question_len,
                          reduce_type, weighted, vocab_df):
    """Compute word overlap between question and context ids.

  Args:
    context_ids: <int32> [batch_size, num_contexts, max_context_len]
    context_len: <int32> [batch_size, num_contexts]
    question_ids: <int32> [batch_size, max_question_len]
    question_len: <int32> [batch_size]
    reduce_type: String for reduce type when computing overlap. Choices are: max
      - Allows at most one match per question word. sum - Sums over all matches
      for each question word.
    weighted: Boolean indicate whether or not weight the overlap by IDF.
    vocab_df: Tensor of shape [vocab_size] for word frequency. Computes this at
      the document-level if not given.

  Returns:
    overlap: <float32> [batch_size, num_contexts]

  Raises:
    Exception: If invalid reduce_type is provided.
  """
    # <float> [batch_size, num_contexts, question_len, context_len]
    overlap = tf.to_float(
        _word_overlap_helper(question_ids=question_ids,
                             context_ids=context_ids))

    # <float> [batch_size, question_len]
    question_mask = tf.sequence_mask(question_len,
                                     tf.shape(question_ids)[1],
                                     dtype=tf.float32)

    # <float> [batch_size, num_contexts, context_len]
    context_mask = tf.sequence_mask(context_len,
                                    tf.shape(context_ids)[2],
                                    dtype=tf.float32)

    overlap *= tf.expand_dims(tf.expand_dims(question_mask, 1), -1)
    overlap *= tf.expand_dims(context_mask, 2)

    if weighted:
        if vocab_df is None:
            # Use document-level IDF computed with respect to the current batch.
            flat_context_ids = tf.to_int32(tf.reshape(context_ids, [-1]))

            # <float> [number of unique words]
            vocab_df = tf.bincount(flat_context_ids,
                                   minlength=tf.reduce_max(question_ids) + 1,
                                   dtype=tf.float32)

            # Replace all zeros with ones.
            vocab_df = tf.where(tf.equal(vocab_df, 0),
                                x=tf.ones_like(vocab_df),
                                y=vocab_df)

        # <float>[batch_size, question_len] expanded to
        # <float> [batch_size, 1, question_len, 1]
        question_df = tf.gather(vocab_df, question_ids)
        question_df = tf.expand_dims(tf.expand_dims(question_df, 1), -1)

        # <float> [batch_size, num_contexts, question_len, context_len]
        overlap = tf.divide(tf.to_float(overlap), question_df)

    if reduce_type == "max":
        # <float> [batch_size, num_contexts]
        overlap = tf.reduce_sum(tf.reduce_max(overlap, axis=[3]), axis=[2])
    elif reduce_type == "sum":
        # <float> [batch_size, num_contexts]
        overlap = tf.reduce_sum(overlap, axis=[2, 3])
    else:
        raise Exception("Reduce type %s is invalid." % reduce_type)

    return overlap
Пример #11
0
 def clip_boxes(self, boxes):
     """Clip boxes to fit in an image."""
     boxes = tf.where(tf.less(boxes, 0), tf.zeros_like(boxes), boxes)
     boxes = tf.where(tf.greater(boxes, self._output_size - 1),
                      (self._output_size - 1) * tf.ones_like(boxes), boxes)
     return boxes
Пример #12
0
def make_global_local_transformer_side_inputs(
    long_paragraph_breakpoints: tf.Tensor,
    long_paragraph_ids: tf.Tensor,
    long_sentence_ids: tf.Tensor,
    global_paragraph_breakpoints: tf.Tensor,
    local_radius: int,
    relative_pos_max_distance: int,
    use_hard_g2l_mask: bool = False,
    ignore_hard_g2l_mask: tf.Tensor = None,
    use_hard_l2g_mask: bool = False,
    ignore_hard_l2g_mask: tf.Tensor = None,
    flat_sequence: bool = False,
    l2g_linked_ids: Optional[tf.Tensor] = None,
    name: Optional[Text] = None
) -> input_utils.GlobalLocalTransformerSideInputs:
    """Makes attention masks and relative ids for l2l, l2g, g2g, g2l for QA tasks.

  When `use_hard_g2l_mask=True` and `use_hard_l2g_mask=False`, the resulting
  attention pattern is similar to Figure 3b of the paper for representing
  a set of (unordered) contexts ("paragraphs" here), except instead of
  defining a new relative position label between a global paragraph token and
  its global sentence tokens, we just place each global paragraph token as
  the first token before subsequent global sentence tokens belonging to it.

  Note: This function assumes that we don't pack multiple examples into a single
  example, which is only done for pre-training.

  See `GlobalLocalTransformerLayers.call()` in `layers/transformer.py` for a
  description of the 8 side inputs.

  Args:
    long_paragraph_breakpoints: <int32>[batch_size, global_seq_len] Tensor of
      `0`s and `1`s indicating paragraph boundaries in the long input.
    long_paragraph_ids: <int32>[batch_size, long_seq_len] Tensor of ids
      indicating the paragraph each token belongs to.
    long_sentence_ids: <int32>[batch_size, long_seq_len] Tensor of ids
      indicating which sentence each token belongs to.
    global_paragraph_breakpoints: <int32>[batch_size, global_seq_len] Tensor of
      of `0`s and `1`s indicating paragraph boundaries in the global input.
    local_radius: How many tokens to the left/right for input tokens to locally
      self-attend to. For example, a value of 1 would allow each token to only
      attend to 1 token to the left and 1 token to the right of it.
    relative_pos_max_distance: Maximum distance to use for relative position
      representations. All larger distances will be clipped to this value. Use 0
      to skip relative position representations entirely.
    use_hard_g2l_mask: If True, global tokens only attend to tokens of the
      corresponding sentences in the long input. If False, global tokens attend
      to all sentences within the corresponding global example.
    ignore_hard_g2l_mask: <int32>[batch_size, global_seq_len] Tensor of `0`s and
      `1`s indicating the indices in the global input which should ignore the
      `use_hard_g2l_mask`. `1` is for ignoring the hard mask and these tokens
      essentially attend to everything (except for padding tokens) in the long
      input. This can be useful to force some tokens (e.g, CLS) to attend to
      everything in the long input even though they don't necessarily map to
      anything in the long input via sentence / paragraph ids etc. This tensor
      will be applicable only when `use_hard_g2l` is enabled.
    use_hard_l2g_mask: If True, long tokens only attend to tokens of the
      corresponding global tokens. If False, long tokens attend to all the
      global tokens within the corresponding global example.
    ignore_hard_l2g_mask: <int32>[batch_size, long_seq_len] Tensor of `0`s and
      `1`s indicating the indices in the long input which should ignore the
      `use_hard_l2g_mask`. `1` is for ignoring the hard mask and these tokens
      essentially attend to everything (except for padding tokens) in the global
      input. This can be useful to force some tokens (e.g, query tokens) to
      attend to everything in the global input even though they don't
      necessarily map to anything in the global input via sentence / paragraph
      ids etc. This tensor will be applicable only when `use_hard_l2g` is
      enabled.
    flat_sequence: If True, the attention masks / relative attention ids would
      be computing assuming the default ETC setting where there is not any
      structure (except for having the notion of a "sentence").
    l2g_linked_ids: <int32>[batch_size, long_seq_len] Tensor specifying the long
      tokens which should be linked to the global tokens. If the input is [[-1,
      -1, 0, 1, 1, -1]], then 2nd long token would be linked to 0-th global
      token and 3rd, 4-th long tokens woulbe linked to the 1st global token.
    name: A name for the operation (optional).

  Returns:
    A `GlobalLocalTransformerSideInputs` with all relevant tensors set.
  """
    with tf.name_scope(name or 'make_global_local_transformer_side_inputs'):

        long_input_mask = tf.minimum(
            tf.cumsum(long_paragraph_breakpoints, axis=-1, reverse=True), 1)
        global_input_mask = tf.minimum(
            tf.cumsum(global_paragraph_breakpoints, axis=-1, reverse=True), 1)

        if flat_sequence:
            # Here we don't use any structure in the input i.e it falls back to
            # the default ETC setting where:
            # a) everything in the long can attend to everything in the global and
            #    vice-versa.
            # b) everything in global attends to everything in global.
            # c) everything in long can attend to everything in long that is within
            #    the local radius
            #
            # Note that there is a small caveat here: The paragraph / cls level tokens
            # in the global input would be orphaned (i.e they wouldn't be linked to
            # anything in the long), but that should be probably
            # okay as they still attend to everything in the global.
            #
            # We don't have any packing here. So we need to construct
            # long/global breakpoints to indicate there's only one example.
            # The structure of these breakpoints should be as follows:
            # [0, 0, .....,1, 0, 0, 0] i.e there should be a single `1` just before
            # the padding begins, rest of the tokens should be `0`.
            return (input_utils.
                    make_global_local_transformer_side_inputs_from_example_ids(
                        long_example_ids=long_input_mask,
                        global_example_ids=global_input_mask,
                        sentence_ids=long_sentence_ids,
                        local_radius=local_radius,
                        relative_pos_max_distance=relative_pos_max_distance,
                        use_hard_g2l_mask=use_hard_g2l_mask,
                        use_hard_l2g_mask=use_hard_l2g_mask))

        # Make paragraphs not attend to other paragraphs in the long input.
        long_paragraph_breakpoints = tf.convert_to_tensor(
            long_paragraph_breakpoints)
        long_paragraph_breakpoint_segments = tf.cumsum(
            long_paragraph_breakpoints, axis=-1, reverse=True)

        l2l_att_mask = feature_utils.make_local_segmented_att_mask(
            long_paragraph_breakpoint_segments, local_radius)

        global_paragraph_breakpoints = tf.convert_to_tensor(
            global_paragraph_breakpoints)
        global_paragraph_breakpoint_segments = tf.cumsum(
            global_paragraph_breakpoints, axis=-1, reverse=True)

        # For g2l, g2g and l2g, we can have everything attend everything else.
        # So we can have attention tokens as all `1`s and account for padding via
        # a mask.
        def _make_input_mask_from_breakpoints(
                breakpoint_segments: tf.Tensor) -> tf.Tensor:
            return tf.minimum(tf.cast(1, dtype=breakpoint_segments.dtype),
                              breakpoint_segments)

        long_attention_tokens = _make_input_mask_from_breakpoints(
            long_paragraph_breakpoint_segments)

        # Ignore the padding tokens.
        global_attention_tokens = _make_input_mask_from_breakpoints(
            global_paragraph_breakpoint_segments)

        g2g_att_mask = feature_utils.make_segmented_att_mask(
            global_attention_tokens)
        l2g_att_mask = tf.cast(
            tf.equal(long_attention_tokens[:, :, tf.newaxis],
                     global_attention_tokens[:, tf.newaxis, :]), tf.int32)
        g2l_att_mask = tf.transpose(l2g_att_mask, perm=[0, 2, 1])

        long_seq_len = long_paragraph_breakpoints.shape.as_list()[1]
        assert long_seq_len is not None

        global_seq_len = global_paragraph_breakpoints.shape.as_list()[1]
        assert global_seq_len is not None

        batch_size = tf.shape(long_paragraph_breakpoints)[0]
        assert batch_size is not None

        global_range = tf.range(global_seq_len, dtype=long_sentence_ids.dtype)
        long_ones = tf.ones_like(long_sentence_ids)
        global_ones = tf.ones_like(global_paragraph_breakpoints)

        if use_hard_g2l_mask:
            if ignore_hard_g2l_mask is None:
                ignore_hard_g2l_mask = tf.zeros_like(
                    global_paragraph_breakpoints)
            else:
                ignore_hard_g2l_mask = tf.convert_to_tensor(
                    ignore_hard_g2l_mask)

            # Have each global token attend to just one sentence instead of having
            # it attend to all the sentences within a global example.
            sentence_hard_g2l_att_mask = tf.equal(
                global_range[tf.newaxis, :, tf.newaxis],
                long_sentence_ids[:, tf.newaxis, :])

            # Also have paragraph global tokens attend to the corresponding long
            # paragraphs.
            paragraph_hard_g2l_att_mask = tf.equal(
                global_range[tf.newaxis, :, tf.newaxis],
                long_paragraph_ids[:, tf.newaxis, :])

            ignore_hard_g2l_att_mask = tf.equal(
                ignore_hard_g2l_mask[:, :, tf.newaxis],
                long_ones[:, tf.newaxis, :])

            # It's possible that certain global tokens, although linked to a long
            # sentence, might still be present in `ignore_hard_g2l_mask`. Such tokens
            # should also attend to everything in the long.
            hard_g2l_att_mask = tf.math.logical_or(
                tf.math.logical_or(sentence_hard_g2l_att_mask,
                                   paragraph_hard_g2l_att_mask),
                ignore_hard_g2l_att_mask)

            hard_g2l_att_mask = tf.cast(hard_g2l_att_mask, dtype=tf.int32)
            g2l_att_mask *= hard_g2l_att_mask

        if use_hard_l2g_mask:
            if ignore_hard_l2g_mask is None:
                ignore_hard_l2g_mask = tf.zeros_like(long_sentence_ids)
            else:
                ignore_hard_l2g_mask = tf.convert_to_tensor(
                    ignore_hard_l2g_mask)

            # Have each long token attend to just the corresponding global token
            # instead of having it attend to all the global tokens within a
            # global example.
            sentence_hard_l2g_att_mask = tf.equal(
                long_sentence_ids[:, :, tf.newaxis],
                global_range[tf.newaxis, tf.newaxis, :])

            # Also have paragraph global tokens attend to the corresponding long
            # paragraphs.
            paragraph_hard_l2g_att_mask = tf.equal(
                long_paragraph_ids[:, :, tf.newaxis],
                global_range[tf.newaxis, tf.newaxis, :])

            ignore_hard_l2g_att_mask = tf.equal(
                ignore_hard_l2g_mask[:, :, tf.newaxis],
                global_ones[:, tf.newaxis, :])

            # It's possible that certain long tokens, although linked to global tokens
            # might still be present in `ignore_hard_l2g_mask`. Such tokens
            # should also attend to everything in the global.
            hard_l2g_att_mask = tf.math.logical_or(
                tf.math.logical_or(sentence_hard_l2g_att_mask,
                                   paragraph_hard_l2g_att_mask),
                ignore_hard_l2g_att_mask)

            hard_l2g_att_mask = tf.cast(hard_l2g_att_mask, dtype=tf.int32)
            l2g_att_mask *= hard_l2g_att_mask

        l2l_relative_att_ids = None
        g2g_relative_att_ids = None
        l2g_relative_att_ids = None
        g2l_relative_att_ids = None

        if relative_pos_max_distance > 0:

            relative_pos_generator = feature_utils.RelativePositionGenerator(
                relative_pos_max_distance)

            l2l_relative_att_ids = relative_pos_generator.make_local_relative_att_ids(
                seq_len=long_seq_len,
                local_radius=local_radius,
                batch_size=batch_size)

            sentence_l2g_relative_att_ids = tf.equal(
                long_sentence_ids[:, :, tf.newaxis],
                global_range[tf.newaxis, tf.newaxis, :])

            # Add relative att ids for global paragraph level tokens.
            paragraph_l2g_relative_att_ids = tf.equal(
                global_range[tf.newaxis, tf.newaxis, :],
                long_paragraph_ids[:, :, tf.newaxis])

            if l2g_linked_ids is None:
                l2g_linked_relative_att_ids = tf.zeros_like(
                    paragraph_l2g_relative_att_ids)
            else:
                l2g_linked_ids = tf.convert_to_tensor(l2g_linked_ids)
                l2g_linked_relative_att_ids = tf.equal(
                    global_range[tf.newaxis, tf.newaxis, :],
                    l2g_linked_ids[:, :, tf.newaxis])

            l2g_relative_att_ids = tf.cast(tf.math.logical_or(
                l2g_linked_relative_att_ids,
                tf.math.logical_or(sentence_l2g_relative_att_ids,
                                   paragraph_l2g_relative_att_ids)),
                                           dtype=tf.int32)

            g2l_relative_att_ids = tf.transpose(l2g_relative_att_ids,
                                                perm=[0, 2, 1])

            # For fused attention, l2l and l2g share the same relative vocabulary, as
            # do g2g and g2l, so we add an offset for l2g and g2l so their original
            # 0/1 ids don't collide with l2l and g2g relative position ids.
            l2g_relative_att_ids += relative_pos_generator.relative_vocab_size
            g2l_relative_att_ids += relative_pos_generator.relative_vocab_size

            g2g_relative_att_ids = relative_pos_generator.make_relative_att_ids(
                seq_len=global_seq_len, batch_size=batch_size)

            # We used up 2 ids to account for the collision in fused attention as
            # mentioned above. Hence the +2.
            g2g_max_rel_id = relative_pos_generator.relative_vocab_size + 2
            g2g_relative_att_ids = (
                feature_utils.overwrite_relative_att_ids_outside_segments(
                    rel_att_ids=g2g_relative_att_ids,
                    segment_ids=global_paragraph_breakpoint_segments,
                    overwrite_value=g2g_max_rel_id))

        return input_utils.GlobalLocalTransformerSideInputs(
            l2l_att_mask=l2l_att_mask,
            g2g_att_mask=g2g_att_mask,
            l2g_att_mask=l2g_att_mask,
            g2l_att_mask=g2l_att_mask,
            l2l_relative_att_ids=l2l_relative_att_ids,
            g2g_relative_att_ids=g2g_relative_att_ids,
            l2g_relative_att_ids=l2g_relative_att_ids,
            g2l_relative_att_ids=g2l_relative_att_ids)
Пример #13
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    # Parameters governing how the x coordinate of the spline will be laid out.
    # We will construct a spline with knots at
    #   [0 : 1 / x_scale : x_max],
    # by fitting it to values sampled at
    #   [0 : 1 / (x_scale * redundancy) : x_max]
    x_max = 12
    x_scale = 1024
    redundancy = 4  # Must be >= 2 for the spline to be useful.

    spline_spacing = 1. / (x_scale * redundancy)
    x_knots = np.arange(0,
                        x_max + spline_spacing,
                        spline_spacing,
                        dtype=np.float64)
    table = []
    with tf.Session() as sess:
        x_knot_ph = tf.placeholder(dtype=tf.float64, shape=())
        alpha_ph = distribution.inv_partition_spline_curve(x_knot_ph)
        partition_ph = numerical_base_partition_function(alpha_ph)
        # We iterate over knots, and for each knot recover the alpha value
        # corresponding to that knot with inv_partition_spline_curve(), and then
        # with that alpha we accurately approximate its partition function using
        # numerical_base_partition_function().
        for x_knot in x_knots:
            alpha, partition = sess.run((alpha_ph, partition_ph),
                                        {x_knot_ph: x_knot})
            table.append((x_knot, alpha, partition))
            print(table[-1])

    table = np.array(table)
    x = table[:, 0]
    alpha = table[:, 1]
    y_gt = np.log(table[:, 2])

    # We grab the values from the true log-partition table that correpond to
    # knots, by looking for where x * x_scale is an integer.
    mask = np.abs(np.round(x * x_scale) - (x * x_scale)) <= 1e-8
    values = y_gt[mask]

    # Initialize `tangents` using a central differencing scheme.
    values_pad = np.concatenate([[values[0] - values[1] + values[0]], values,
                                 [values[-1] - values[-2] + values[-1]]], 0)
    tangents = (values_pad[2:] - values_pad[:-2]) / 2.

    # Construct the spline's value and tangent TF variables, constraining the last
    # knot to have a fixed value Z(infinity) and a tangent of zero.
    n = len(values)
    tangents = tf.Variable(tangents, tf.float64)
    tangents = tf.where(
        np.arange(n) == (n - 1), tf.zeros_like(tangents), tangents)

    values = tf.Variable(values, tf.float64)
    values = tf.where(
        np.arange(n) == (n - 1),
        tf.ones_like(tangents) * 0.70526025442689566, values)

    # Interpolate into the spline.
    y = cubic_spline.interpolate1d(x * x_scale, values, tangents)

    # We minimize the maximum residual, which makes for a very ugly optimization
    # problem but appears to work in practice, and is what we most care about.
    loss = tf.reduce_max(tf.abs(y - y_gt))

    # Fit the spline.
    num_iters = 10001
    with tf.Session() as sess:
        global_step = tf.Variable(0, trainable=False)

        opt = tf.train.MomentumOptimizer(learning_rate=1e-9, momentum=0.99)
        step = opt.minimize(loss, global_step=global_step)
        sess.run(tf.global_variables_initializer())

        trace = []
        for ii in range(num_iters):
            _, i_loss, i_values, i_tangents, i_y = sess.run(
                [step, loss, values, tangents, y])
            trace.append(i_loss)
            if (ii % 200) == 0:
                print('%5d: %e' % (ii, i_loss))

    mask = alpha <= 4
    print('Max Error (a <= 4): %e' % np.max(np.abs(i_y[mask] - y_gt[mask])))
    print('Max Error: %e' % np.max(np.abs(i_y - y_gt)))

    # Save the spline to disk.
    np.savez('./data/partition_spline.npz',
             x_scale=x_scale,
             values=i_values,
             tangents=i_tangents)
Пример #14
0
    def call(self, x):
        input_image, y_pred, y_true, true_boxes = x

        # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
        y_pred = tf.reshape(
            y_pred,
            tf.concat([tf.shape(input=y_pred)[:3],
                       tf.constant([3, -1])],
                      axis=0))

        # initialize the masks
        object_mask = tf.expand_dims(y_true[..., 4], 4)

        # the variable to keep track of number of batches processed
        batch_seen = tf.Variable(0.)

        # compute grid factor and net factor
        grid_h = tf.shape(input=y_true)[1]
        grid_w = tf.shape(input=y_true)[2]
        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32),
                                 [1, 1, 1, 1, 2])

        net_h = tf.shape(input=input_image)[1]
        net_w = tf.shape(input=input_image)[2]
        net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32),
                                [1, 1, 1, 1, 2])
        """
        Adjust prediction
        """
        pred_box_xy = (self.cell_grid[:, :grid_h, :grid_w, :, :] +
                       tf.sigmoid(y_pred[..., :2]))  # sigma(t_xy) + c_xy
        pred_box_wh = y_pred[..., 2:4]  # t_wh
        pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]),
                                       4)  # adjust confidence
        pred_box_class = y_pred[..., 5:]  # adjust class probabilities
        """
        Adjust ground truth
        """
        true_box_xy = y_true[..., 0:2]  # (sigma(t_xy) + c_xy)
        true_box_wh = y_true[..., 2:4]  # t_wh
        true_box_conf = tf.expand_dims(y_true[..., 4], 4)
        true_box_class = tf.argmax(input=y_true[..., 5:], axis=-1)
        """
        Compare each predicted box to all true boxes
        """
        # initially, drag all objectness of all boxes to 0
        conf_delta = pred_box_conf - 0

        # then, ignore the boxes which have good overlap with some true box
        true_xy = true_boxes[..., 0:2] / grid_factor
        true_wh = true_boxes[..., 2:4] / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
        pred_wh = tf.expand_dims(
            tf.exp(pred_box_wh) * self.anchors / net_factor, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)

        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)

        best_ious = tf.reduce_max(input_tensor=iou_scores, axis=4)
        conf_delta *= tf.expand_dims(
            tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4)
        """
        Compute some online statistics
        """
        true_xy = true_box_xy / grid_factor
        true_wh = tf.exp(true_box_wh) * self.anchors / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = pred_box_xy / grid_factor
        pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)
        iou_scores = object_mask * tf.expand_dims(iou_scores, 4)

        count = tf.reduce_sum(input_tensor=object_mask)
        count_noobj = tf.reduce_sum(input_tensor=1 - object_mask)
        detect_mask = tf.cast((pred_box_conf * object_mask) >= 0.5,
                              dtype=tf.float32)
        class_mask = tf.expand_dims(
            tf.cast(tf.equal(tf.argmax(input=pred_box_class, axis=-1),
                             true_box_class),
                    dtype=tf.float32), 4)
        recall50 = tf.reduce_sum(
            input_tensor=tf.cast(iou_scores >= 0.5, dtype=tf.float32) *
            detect_mask * class_mask) / (count + 1e-3)
        recall75 = tf.reduce_sum(
            input_tensor=tf.cast(iou_scores >= 0.75, dtype=tf.float32) *
            detect_mask * class_mask) / (count + 1e-3)
        avg_iou = tf.reduce_sum(input_tensor=iou_scores) / (count + 1e-3)
        avg_obj = tf.reduce_sum(input_tensor=pred_box_conf *
                                object_mask) / (count + 1e-3)
        avg_noobj = tf.reduce_sum(input_tensor=pred_box_conf *
                                  (1 - object_mask)) / (count_noobj + 1e-3)
        avg_cat = tf.reduce_sum(input_tensor=object_mask *
                                class_mask) / (count + 1e-3)
        """
        Warm-up training
        """
        batch_seen = tf.assign_add(batch_seen, 1.)

        true_box_xy, true_box_wh, xywh_mask = tf.cond(
            pred=tf.less(batch_seen, self.warmup_batches + 1),
            true_fn=lambda: [
                true_box_xy +
                (0.5 + self.cell_grid[:, :grid_h, :grid_w, :, :]) *
                (1 - object_mask), true_box_wh + tf.zeros_like(true_box_wh) *
                (1 - object_mask),
                tf.ones_like(object_mask)
            ],
            false_fn=lambda: [true_box_xy, true_box_wh, object_mask])
        """
        Compare each true box to all anchor boxes
        """
        wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
        wh_scale = tf.expand_dims(
            2 - wh_scale[..., 0] * wh_scale[..., 1],
            axis=4)  # the smaller the box, the bigger the scale

        xy_delta = xywh_mask * (pred_box_xy -
                                true_box_xy) * wh_scale * self.xywh_scale
        wh_delta = xywh_mask * (pred_box_wh -
                                true_box_wh) * wh_scale * self.xywh_scale
        conf_delta = object_mask * (
            pred_box_conf - true_box_conf) * self.obj_scale + (
                1 - object_mask) * conf_delta * self.noobj_scale
        class_delta = object_mask * \
                      tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
                      self.class_scale

        loss_xy = tf.reduce_sum(input_tensor=tf.square(xy_delta),
                                axis=list(range(1, 5)))
        loss_wh = tf.reduce_sum(input_tensor=tf.square(wh_delta),
                                axis=list(range(1, 5)))
        loss_conf = tf.reduce_sum(input_tensor=tf.square(conf_delta),
                                  axis=list(range(1, 5)))
        loss_class = tf.reduce_sum(input_tensor=class_delta,
                                   axis=list(range(1, 5)))

        loss = loss_xy + loss_wh + loss_conf + loss_class

        loss = tf.Print(loss, [grid_h, avg_obj],
                        message='avg_obj \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_noobj],
                        message='avg_noobj \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_iou],
                        message='avg_iou \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_cat],
                        message='avg_cat \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, recall50],
                        message='recall50 \t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, recall75],
                        message='recall75 \t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, count],
                        message='count \t',
                        summarize=1000)
        loss = tf.Print(loss, [
            grid_h,
            tf.reduce_sum(input_tensor=loss_xy),
            tf.reduce_sum(input_tensor=loss_wh),
            tf.reduce_sum(input_tensor=loss_conf),
            tf.reduce_sum(input_tensor=loss_class)
        ],
                        message='loss xy, wh, conf, class: \t',
                        summarize=1000)

        return loss * self.grid_scale
Пример #15
0
def multilevel_roi_align(features, boxes, box_levels, output_size,
                         num_samples_per_cell_y=1, num_samples_per_cell_x=1,
                         align_corners=False, extrapolation_value=0.0,
                         scope=None):
  """Applies RoI Align op and returns feature for boxes.

  Given multiple features maps indexed by different levels, and a set of boxes
  where each box is mapped to a certain level, this function selectively crops
  and resizes boxes from the corresponding feature maps.

  We follow the RoI Align technique in https://arxiv.org/pdf/1703.06870.pdf
  figure 3. Specifically, each box is subdivided uniformly into a grid
  consisting of output_size[0] x output_size[1] rectangular cells. Within each
  cell we select `num_points` points uniformly and compute feature values using
  bilinear interpolation. Finally, we average pool the interpolated values in
  each cell to obtain a [output_size[0], output_size[1], channels] feature.

  If `align_corners` is true, sampling points are uniformly spread such that
  corner points exactly overlap corners of the boxes.

  In this function we also follow the convention of treating feature pixels as
  point objects with no spatial extent.

  Args:
    features: A list of 4D float tensors of shape [batch_size, max_height,
      max_width, channels] containing features. Note that each feature map must
      have the same number of channels.
    boxes: A 3D float tensor of shape [batch_size, num_boxes, 4] containing
      boxes of the form [ymin, xmin, ymax, xmax] in normalized coordinates.
    box_levels: A 3D int32 tensor of shape [batch_size, num_boxes]
      representing the feature level index for each box.
    output_size: An list of two integers [size_y, size_x] indicating the output
      feature size for each box.
    num_samples_per_cell_y: Number of grid points to sample along y axis in each
      cell.
    num_samples_per_cell_x: Number of grid points to sample along x axis in each
      cell.
    align_corners: Whether to align the corner grid points exactly with box
      corners.
    extrapolation_value: a float value to use for extrapolation.
    scope: Scope name to use for this op.

  Returns:
    A 5D float tensor of shape [batch_size, num_boxes, output_size[0],
    output_size[1], channels] representing the cropped features.
  """
  with tf.name_scope(scope, 'MultiLevelRoIAlign'):
    features, true_feature_shapes = pad_to_max_size(features)
    batch_size = tf.shape(features)[0]
    num_levels = features.get_shape().as_list()[1]
    max_feature_height = tf.shape(features)[2]
    max_feature_width = tf.shape(features)[3]
    num_filters = features.get_shape().as_list()[4]
    num_boxes = tf.shape(boxes)[1]

    # Convert boxes to absolute co-ordinates.
    true_feature_shapes = tf.cast(true_feature_shapes, dtype=boxes.dtype)
    true_feature_shapes = tf.gather(true_feature_shapes, box_levels)
    boxes *= tf.concat([true_feature_shapes - 1] * 2, axis=-1)

    size_y = output_size[0] * num_samples_per_cell_y
    size_x = output_size[1] * num_samples_per_cell_x
    box_grid_y, box_grid_x = box_grid_coordinate_vectors(
        boxes, size_y=size_y, size_x=size_x, align_corners=align_corners)
    (feature_grid_y0, feature_grid_x0, feature_grid_y1,
     feature_grid_x1) = feature_grid_coordinate_vectors(box_grid_y, box_grid_x)
    feature_grid_y = tf.reshape(
        tf.stack([feature_grid_y0, feature_grid_y1], axis=3),
        [batch_size, num_boxes, -1])
    feature_grid_x = tf.reshape(
        tf.stack([feature_grid_x0, feature_grid_x1], axis=3),
        [batch_size, num_boxes, -1])
    feature_coordinates = ravel_indices(feature_grid_y, feature_grid_x,
                                        num_levels, max_feature_height,
                                        max_feature_width, box_levels)
    valid_indices = _valid_indicator(feature_grid_y, feature_grid_x,
                                     true_feature_shapes)
    feature_coordinates = tf.where(valid_indices, feature_coordinates,
                                   -1 * tf.ones_like(feature_coordinates))
    flattened_features = tf.reshape(features, [-1, num_filters])
    flattened_feature_values = _gather_valid_indices(flattened_features,
                                                     feature_coordinates,
                                                     extrapolation_value)
    features_per_box = tf.reshape(
        flattened_feature_values,
        [batch_size, num_boxes, size_y * 2, size_x * 2, num_filters])

    # Cast tensors into dtype of features.
    box_grid_y = tf.cast(box_grid_y, dtype=features_per_box.dtype)
    box_grid_x = tf.cast(box_grid_x, dtype=features_per_box.dtype)
    feature_grid_y0 = tf.cast(feature_grid_y0, dtype=features_per_box.dtype)
    feature_grid_x0 = tf.cast(feature_grid_x0, dtype=features_per_box.dtype)

    # RoI Align operation is a bilinear interpolation of four
    # neighboring feature points f0, f1, f2, and f3 onto point y, x given by
    # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
    #                       [f10, f11]]
    #
    # Unrolling the matrix multiplies gives us:
    # f(y, x) = (hy * hx) f00 + (hy * lx) f01 + (ly * hx) f10 + (lx * ly) f11
    # f(y, x) = w00 * f00 + w01 * f01 + w10 * f10 + w11 * f11
    #
    # This can be computed by applying pointwise multiplication and sum_pool in
    # a 2x2 window.
    ly = box_grid_y - feature_grid_y0
    lx = box_grid_x - feature_grid_x0
    hy = 1.0 - ly
    hx = 1.0 - lx

    kernel_y = tf.reshape(
        tf.stack([hy, ly], axis=3), [batch_size, num_boxes, size_y * 2, 1])

    kernel_x = tf.reshape(
        tf.stack([hx, lx], axis=3), [batch_size, num_boxes, 1, size_x * 2])

    # Multiplier 4 is to make tf.nn.avg_pool behave like sum_pool.
    interpolation_kernel = kernel_y * kernel_x * 4

    # Interpolate the gathered features with computed interpolation kernels.
    features_per_box *= tf.expand_dims(interpolation_kernel, axis=4),
    features_per_box = tf.reshape(
        features_per_box,
        [batch_size * num_boxes, size_y * 2, size_x * 2, num_filters])

    # This combines the two pooling operations - sum_pool to perform bilinear
    # interpolation and avg_pool to pool the values in each bin.
    features_per_box = tf.nn.avg_pool(
        features_per_box,
        [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1],
        [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1], 'VALID')
    features_per_box = tf.reshape(
        features_per_box,
        [batch_size, num_boxes, output_size[0], output_size[1], num_filters])

    return features_per_box
Пример #16
0
def get_stage_1_loss(pred_labels_key_p,pred_labels_direction,pred_regression_direction,pred_regression_position, \
                       pred_labels_type,labels_key_p,labels_direction,regression_direction,regression_position,labels_type,\
                       simmat_pl,neg_simmat_pl,pred_simmat,pred_conf_logits):
    batch_size = pred_labels_key_p.get_shape()[0].value
    num_point = pred_labels_key_p.get_shape()[1].value
    mask = tf.cast(labels_key_p, tf.float32)
    neg_mask = tf.ones_like(mask) - mask
    Np = tf.expand_dims(tf.reduce_sum(mask, axis=1), 1)
    Ng = tf.expand_dims(tf.reduce_sum(neg_mask, axis=1), 1)
    all_mask = tf.ones_like(mask)
    #loss:task1
    task_1_loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=pred_labels_key_p, labels=labels_key_p) * (mask *
                                                              (Ng / Np) + 1))
    task_1_recall = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_key_p,axis=2,output_type = tf.int32),\
                          labels_key_p),tf.float32)*mask,axis = 1)/tf.reduce_sum(mask,axis=1))
    task_1_acc = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_key_p,axis=2,output_type = tf.int32),\
                          labels_key_p),tf.float32),axis = 1)/num_point)
    #loss:task2_1
    task_2_1_loss =  tf.reduce_mean(tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = pred_labels_direction,\
                               labels = labels_direction)*mask,axis = 1)/tf.reduce_sum(mask,axis=1))
    task_2_1_acc = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_direction,axis=2,output_type=tf.int32), \
                               labels_direction),tf.float32)*mask,axis=1)/tf.reduce_sum(mask,axis=1))
    #loss:task2_2
    task_2_2_loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(smooth_l1_dist(pred_regression_direction-regression_direction),axis=2)*mask, \
                               axis = 1)/tf.reduce_sum(mask,axis=1))
    #loss:task3
    task_3_loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(smooth_l1_dist(pred_regression_position-regression_position),axis=2)*mask, \
                               axis = 1)/tf.reduce_sum(mask,axis=1))
    #loss:task4
    task_4_loss = tf.reduce_mean(
        tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=pred_labels_type, labels=labels_type) * mask,
                      axis=1) / tf.reduce_sum(mask, axis=1))
    task_4_acc = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_type,axis=2,output_type = tf.int32),\
                          labels_type),tf.float32)*mask,axis = 1)/tf.reduce_sum(mask,axis=1))

    #loss: task_5
    pos = pred_simmat * simmat_pl
    neg = tf.maximum(80 - pred_simmat, 0) * neg_simmat_pl
    task_5_loss = tf.reduce_mean(pos + neg)
    #loss: task_6
    ng_label = tf.greater(simmat_pl, 0.5)
    ng = tf.less(pred_simmat, 80)
    epsilon = tf.constant(
        np.ones(ng_label.get_shape()[:2]).astype(np.float32) * 1e-6)
    pts_iou = tf.reduce_sum(tf.cast(tf.logical_and(ng, ng_label), tf.float32), axis=2) / \
                      (tf.reduce_sum(tf.cast(tf.logical_or(ng, ng_label), tf.float32), axis=2) + epsilon)
    task_6_loss = tf.reduce_mean(
        tf.squared_difference(pts_iou, tf.squeeze(pred_conf_logits, [2])))
    w1 = 1
    w2_1 = 1
    w2_2 = 100
    w3 = 100
    w4 = 1
    w5 = 1
    w6 = 100

    loss = task_1_loss * w1 + task_2_1_loss * w2_1 + task_2_2_loss * w2_2 + task_3_loss * w3 + task_4_loss * w4 + task_5_loss * w5 + task_6_loss * w6

    tf.summary.scalar('all loss', loss)
    tf.add_to_collection('losses', loss)
    return task_1_loss, task_1_recall, task_1_acc, task_2_1_loss, task_2_1_acc, task_2_2_loss, task_3_loss, task_4_loss, task_4_acc, task_5_loss, task_6_loss, loss
Пример #17
0
def losses(generator_fn, discriminator_fn, real_data, z, disc_params, flags):
    """Returns loss variables for the generator and discriminator."""
    fake_data = generator_fn(z)

    if flags.acts_loss > 0.:
        disc_real, disc_real_acts = discriminator_fn(real_data,
                                                     return_acts=True)
        disc_fake, disc_fake_acts = discriminator_fn(fake_data,
                                                     return_acts=True)
    else:
        disc_real = discriminator_fn(real_data)
        disc_fake = discriminator_fn(fake_data)

    acts_l2_loss = 0.
    acts_count = 1.
    if flags.acts_loss > 0.:
        all_disc_acts = disc_real_acts + disc_fake_acts
        for act in all_disc_acts:
            acts_l2_loss += tf.nn.l2_loss(act)
            acts_count += tf.reduce_sum(tf.ones_like(act))

    l2_reg_d_cost = 0.
    if flags.l2_reg_d > 0:
        for p in disc_params:
            if 'weights' in p.name:
                l2_reg_d_cost += tf.nn.l2_loss(p)
        l2_reg_d_cost *= flags.l2_reg_d

        def cn(x):
            """compressive nonlinearity."""
            return tf.asinh(4. * x) / 4.

    if flags.algorithm == 'vanilla':
        gen_cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=disc_fake, labels=tf.ones_like(disc_fake)))
        disc_cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=disc_fake, labels=tf.zeros_like(disc_fake)))
        disc_cost += tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=disc_real, labels=tf.ones_like(disc_real)))
        divergence = gen_cost
        disc_cost += l2_reg_d_cost
        disc_cost += flags.acts_loss * (acts_l2_loss / (1e-2 + acts_count))

    elif flags.algorithm == 'vanilla_minimax':
        disc_cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=disc_fake, labels=tf.zeros_like(disc_fake)))
        disc_cost += tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=disc_real, labels=tf.ones_like(disc_real)))
        gen_cost = -disc_cost
        divergence = ((-disc_cost) + tf.log(4.)) / 2.
        disc_cost += l2_reg_d_cost
        disc_cost += flags.acts_loss * (acts_l2_loss / (1e-2 + acts_count))

    elif flags.algorithm == 'wgan-gp':
        input_ndim = len(real_data.get_shape())
        if flags.wgangp_compressive_loss:
            disc_fake = cn(disc_fake)
            disc_real = cn(disc_real)
        wgan_disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
        alpha = tf.random_uniform(shape=[tf.shape(real_data)[0]] +
                                  [1 for i in range(input_ndim - 1)],
                                  minval=0.,
                                  maxval=1.)
        differences = fake_data - real_data
        interpolates = real_data + (alpha * differences)
        if flags.acts_loss > 0.:
            disc_interps, disc_interp_acts = discriminator_fn(interpolates,
                                                              return_acts=True)
        else:
            disc_interps = discriminator_fn(interpolates)
        gradients = tf.gradients(disc_interps, [interpolates])[0]
        slopes = tf.sqrt(
            1e-8 +
            tf.reduce_sum(tf.square(gradients),
                          reduction_indices=[i for i in range(1, input_ndim)]))
        gradient_penalty = tf.reduce_mean((slopes - 1.)**2)
        disc_cost = wgan_disc_cost + (flags.wgangp_lambda * gradient_penalty)
        disc_cost += l2_reg_d_cost

        if flags.acts_loss > 0.:
            for act in disc_interp_acts:
                acts_l2_loss += flags.acts_loss * tf.nn.l2_loss(act)
                acts_count += tf.reduce_sum(tf.ones_like(act))
        disc_cost += flags.acts_loss * (acts_l2_loss / (1e-2 + acts_count))

        if flags.wgangp_minimax:
            gen_cost = -disc_cost
            divergence = -disc_cost
        else:
            gen_cost = -tf.reduce_mean(disc_fake)
            divergence = -wgan_disc_cost

    elif flags.algorithm == 'r1':
        disc_cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=disc_fake, labels=tf.zeros_like(disc_fake)))
        disc_cost += tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=disc_real, labels=tf.ones_like(disc_real)))
        gen_cost = -disc_cost
        divergence = ((-disc_cost) + tf.log(4.)) / 2.

        input_ndim = len(real_data.get_shape())
        gradients = tf.gradients(tf.nn.sigmoid(disc_real), [real_data])[0]
        slopes = tf.sqrt(
            1e-8 +
            tf.reduce_sum(tf.square(gradients),
                          reduction_indices=[i for i in range(1, input_ndim)]))
        gradient_penalty = 0.5 * tf.reduce_mean(slopes**2)

        disc_cost += flags.wgangp_lambda * gradient_penalty
        disc_cost += l2_reg_d_cost
        disc_cost += flags.acts_loss * (acts_l2_loss / (1e-2 + acts_count))

    elif flags.algorithm == 'r1-ns':
        disc_cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=disc_fake, labels=tf.zeros_like(disc_fake)))
        disc_cost += tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=disc_real, labels=tf.ones_like(disc_real)))
        divergence = ((-disc_cost) + tf.log(4.)) / 2.
        gen_cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=disc_fake, labels=tf.ones_like(disc_fake)))

        input_ndim = len(real_data.get_shape())
        gradients = tf.gradients(tf.nn.sigmoid(disc_real), [real_data])[0]
        slopes = tf.sqrt(
            1e-8 +
            tf.reduce_sum(tf.square(gradients),
                          reduction_indices=[i for i in range(1, input_ndim)]))
        gradient_penalty = 0.5 * tf.reduce_mean(slopes**2)

        disc_cost += flags.wgangp_lambda * gradient_penalty
        disc_cost += l2_reg_d_cost
        disc_cost += flags.acts_loss * (acts_l2_loss / (1e-2 + acts_count))

    return gen_cost, disc_cost, divergence
Пример #18
0
def build_distractors(distractor_examples, context):
    """Create inputs with distractors."""

    CLS_ID = tf.constant([101], dtype=tf.int64)  # pylint: disable=invalid-name
    SEP_ID = tf.constant([102], dtype=tf.int64)  # pylint: disable=invalid-name

    bert_inputs = []
    input_masks = []
    segment_ids = []
    # for each distractor
    sample_size = int((FLAGS.num_choices - 4) / (FLAGS.data_window_size - 1))
    for example in distractor_examples:
        # randomly sample 7
        intermediate_examples_tensor = tf.reduce_sum(tf.abs(example), 1)
        examples_zero_vector = tf.zeros(shape=(1, 1), dtype=tf.int64)
        examples_bool_mask = tf.squeeze(
            tf.not_equal(intermediate_examples_tensor, examples_zero_vector))
        paragraph_len = tf.reduce_sum(tf.cast(examples_bool_mask, tf.int32))
        indices = tf.range(0, limit=paragraph_len, dtype=tf.int32)
        shuffled_indices = tf.random.shuffle(indices)[:sample_size]

        # extend examples / targets
        distractor_cand = example
        distractor_cand_plus_one = distractor_cand[1:]
        distractor_cand_plus_two = distractor_cand[2:]

        # pad extensions
        paddings_one = tf.constant([[0, 1], [0, 0]])
        distractor_cand_plus_one = tf.pad(distractor_cand_plus_one,
                                          paddings_one)

        paddings_two = tf.constant([[0, 2], [0, 0]])
        distractor_cand_plus_two = tf.pad(distractor_cand_plus_two,
                                          paddings_two)

        distractor_cand_ext = tf.concat([
            distractor_cand, distractor_cand_plus_one, distractor_cand_plus_two
        ],
                                        axis=1)

        distractors = tf.gather(distractor_cand_ext, shuffled_indices)
        for i in range(sample_size):
            distractors_non_zero = tf.where(
                tf.not_equal(distractors[i], tf.zeros_like(distractors[i])))
            distractors_stripped = tf.gather_nd(distractors[i],
                                                distractors_non_zero)
            segment_id = tf.concat([
                tf.zeros_like(CLS_ID, dtype=tf.int64),
                tf.zeros_like(context),
                tf.zeros_like(SEP_ID, dtype=tf.int64),
                tf.ones_like(distractors_stripped),
                tf.ones_like(SEP_ID, dtype=tf.int64)
            ],
                                   axis=0)
            segment_id = pad_and_cut(segment_id, FLAGS.max_seq_length)
            segment_ids.append(segment_id)
            new_input = tf.concat(
                [CLS_ID, context, SEP_ID, distractors_stripped, SEP_ID],
                axis=0)

            input_mask = tf.ones_like(new_input)
            input_mask = pad_and_cut(input_mask, FLAGS.max_seq_length)
            input_masks.append(input_mask)
            padded_new_input = pad_and_cut(new_input, FLAGS.max_seq_length)
            bert_inputs.append(padded_new_input)

    bert_inputs = tf.stack(bert_inputs, axis=0)
    input_masks = tf.stack(input_masks, axis=0)
    segment_ids = tf.stack(segment_ids, axis=0)
    out = Outputs_And_Context(bert_inputs, input_masks, segment_ids, None,
                              None)
    return out
Пример #19
0
        def model_fn(features, labels, mode):
            """AdversarialReweightingModel model_fn.

      Args:
        features: `Tensor` or `dict` of `Tensor`.
        labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair
          for the key self.label_column_name.
        mode: Defines whether this is training, evaluation or prediction. See
          `ModeKeys`. Currently PREDICT mode is not implemented.

      Returns:
        An instance of `tf.estimator.EstimatorSpec', which encapsulates the
        `mode`, `predictions`, `loss` and the `train_op`. Note that here
        `predictions` is either a `Tensor` or a `dict` of `Tensor` objects,
        representing the prediction of the bianry classification model.
        'loss` is a scalar containing the loss of the step and `train_op` is the
        op for training.
      """

            # Instantiates a tensor with weight for positive class examples only
            pos_weights = tf.cast(tf.equal(labels[self._label_column_name], 1),
                                  dtype=tf.float32)

            # Instantiates a tensor with true class labels
            class_labels = labels[self._label_column_name]

            # Initialize a global step variable used for alternate training
            current_step = self._get_or_create_global_step_var()

            if mode == tf.estimator.ModeKeys.EVAL:
                tf.logging.info('model_fn: EVAL, {}'.format(mode))
            elif mode == tf.estimator.ModeKeys.TRAIN:
                tf.logging.info('model_fn: TRAIN, {}'.format(mode))

            # Creates a DNN architecture for primary binary classification task
            with tf.name_scope('primary_NN'):
                with tf.variable_scope('primary'):
                    input_layer = tf.feature_column.input_layer(
                        features, self._feature_columns)
                    h1 = tf.layers.Dense(
                        self._primary_hidden_units[0],
                        activation=self._activation)(input_layer)
                    h2 = tf.layers.Dense(self._primary_hidden_units[1],
                                         activation=self._activation)(h1)
                    logits = tf.layers.Dense(1)(h2)
                    sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid')
                    class_predictions = tf.cast(
                        tf.greater(sigmoid_output, 0.5), tf.float32)
                    tf.summary.histogram('class_predictions',
                                         class_predictions)

            # Creates a network architecture for the adversarial regression task
            with tf.name_scope('adversary_NN'):
                with tf.variable_scope('adversary'):
                    # Gets adversary features and features columns
                    adversarial_features, adversary_feature_columns = self._get_adversary_features_and_feature_columns(features, labels)  # pylint: disable=line-too-long
                    adv_input_layer = tf.feature_column.input_layer(
                        adversarial_features, adversary_feature_columns)
                    adv_h1 = tf.layers.Dense(
                        self._adversary_hidden_units[0])(adv_input_layer)
                    adv_output_layer = tf.layers.Dense(1,
                                                       use_bias=True)(adv_h1)
                    example_weights = tf.cond(
                        tf.greater(current_step, self._pretrain_steps),
                        true_fn=lambda: self._compute_example_weights(
                            adv_output_layer),
                        false_fn=lambda: tf.ones_like(class_labels))

            # Adds summary variables to tensorboard
            with tf.name_scope('example_weights'):
                tf.summary.histogram('example_weights', example_weights)
                tf.summary.histogram('label', class_labels)

            # Initializes Loss Functions
            primary_loss = self._primary_loss(class_labels, logits,
                                              example_weights)
            adversary_loss = self._adversary_loss(class_labels, logits,
                                                  pos_weights, example_weights,
                                                  self._adversary_loss_type)

            # Sets up dictionaries used for computing performance metrics
            predictions = {
                (self._label_column_name, 'class_ids'):
                tf.reshape(class_predictions, [-1]),
                (self._label_column_name, 'logistic'):
                tf.reshape(sigmoid_output, [-1]),
                ('example_weights'):
                tf.reshape(example_weights, [-1])
            }

            class_id_kwargs = {
                'labels': class_labels,
                'predictions': class_predictions
            }
            logistics_kwargs = {
                'labels': class_labels,
                'predictions': sigmoid_output
            }

            # EVAL Mode
            if mode == tf.estimator.ModeKeys.EVAL:
                with tf.name_scope('eval_metrics'):
                    eval_metric_ops = {
                        'accuracy':
                        tf.metrics.accuracy(**class_id_kwargs),
                        'precision':
                        tf.metrics.precision(**class_id_kwargs),
                        'recall':
                        tf.metrics.recall(**class_id_kwargs),
                        'fp':
                        tf.metrics.false_positives(**class_id_kwargs),
                        'fn':
                        tf.metrics.false_negatives(**class_id_kwargs),
                        'tp':
                        tf.metrics.true_positives(**class_id_kwargs),
                        'tn':
                        tf.metrics.true_negatives(**class_id_kwargs),
                        'fpr':
                        contrib_metrics.streaming_false_positive_rate(
                            **class_id_kwargs),  # pylint: disable=line-too-long
                        'fnr':
                        contrib_metrics.streaming_false_negative_rate(
                            **class_id_kwargs),  # pylint: disable=line-too-long
                        'auc':
                        tf.metrics.auc(curve='ROC', **logistics_kwargs),
                        'aucpr':
                        tf.metrics.auc(curve='PR', **logistics_kwargs)
                    }

                    # EstimatorSpec object for evaluation
                    estimator_spec = tf.estimator.EstimatorSpec(
                        mode=mode,
                        predictions=predictions,
                        loss=primary_loss,
                        eval_metric_ops=eval_metric_ops)

            # TRAIN Mode
            if mode == tf.estimator.ModeKeys.TRAIN:
                # Filters trainable variables for each task
                all_trainable_vars = tf.trainable_variables()
                primary_trainable_vars = [
                    v for v in all_trainable_vars if 'primary' in v.op.name
                ]
                adversary_trainable_vars = [
                    v for v in all_trainable_vars if 'adversary' in v.op.name
                ]

                # TRAIN_OP for adversary DNN
                train_op_adversary = contrib_layers.optimize_loss(
                    loss=adversary_loss,
                    variables=adversary_trainable_vars,
                    global_step=contrib_framework.get_global_step(),
                    learning_rate=self._adversary_learning_rate,
                    optimizer=self._optimizer)

                # TRAIN_OP for primary DNN
                train_op_primary = contrib_layers.optimize_loss(
                    loss=primary_loss,
                    variables=primary_trainable_vars,
                    global_step=contrib_framework.get_global_step(),
                    learning_rate=self._primary_learning_rate,
                    optimizer=self._optimizer)

                # Upto ``pretrain_steps'' trains primary only.
                # Beyond ``pretrain_steps'' alternates between primary and adversary.
                estimator_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    loss=primary_loss + adversary_loss,
                    train_op=tf.cond(
                        tf.greater(current_step, self._pretrain_steps),
                        true_fn=lambda: tf.group(
                            [train_op_primary, train_op_adversary]),  # pylint: disable=line-too-long
                        false_fn=lambda: tf.group([train_op_primary])))

            return estimator_spec
Пример #20
0
def build_bert_inputs(example):
    """Convert example <Tensor [30, 70]> into bert inputs."""

    CLS_ID = tf.constant([101], dtype=tf.int64)  # pylint: disable=invalid-name
    SEP_ID = tf.constant([102], dtype=tf.int64)  # pylint: disable=invalid-name
    max_len = tf.constant([FLAGS.max_para_length])
    context_size = tf.constant([FLAGS.context_size])

    intermediate_examples_tensor = tf.reduce_sum(tf.abs(example), 1)
    examples_zero_vector = tf.zeros(shape=(1, 1), dtype=tf.int64)
    examples_bool_mask = tf.squeeze(
        tf.not_equal(intermediate_examples_tensor, examples_zero_vector))
    paragraph_len = tf.reduce_sum(tf.cast(examples_bool_mask, tf.int32))

    start = tf.random.uniform([1],
                              0,
                              tf.reshape(paragraph_len, []) -
                              tf.reshape(context_size, []) + 1,
                              dtype=tf.int32)

    # Slice the document into the before, after and context.
    # Discard the zero padding.
    sizes = tf.squeeze(
        tf.concat([[
            start, context_size, paragraph_len - context_size - start,
            max_len - paragraph_len
        ]], 0))
    before, context, after, _ = tf.split(example, sizes, axis=0)

    # Gather the context removing zero padding at end of sentences.
    non_zeros = tf.where(tf.not_equal(context, tf.zeros_like(context)))
    context_gathered = tf.gather_nd(context, non_zeros)

    # Flip before so we select the 4 sentences closest to target
    before = tf.reverse(before, axis=[0])

    # pad both to longer than needed
    paddings = tf.constant([[0, 8], [0, 0]])
    before = tf.pad(before, paddings)
    after = tf.pad(after, paddings)

    # Extend targets to 3 sentences
    # pad both
    before_minus_one = before[1:][:4]
    before_minus_two = before[2:][:4]
    after_plus_one = after[1:][:4]
    after_plus_two = after[2:][:4]
    before = before[:4]
    after = after[:4]

    before = tf.concat([before_minus_two, before_minus_one, before], axis=1)
    after = tf.concat([after, after_plus_one, after_plus_two], axis=1)
    ############################################################################

    # before = before[:4]
    # after = after[:4]

    # These 8 sentences are the 8 surrounding targets. Some are padding.
    targets = tf.concat([before, after], axis=0)

    # Remove the padding from the sourrounding sentences
    # Eg. if context starts at beginning of paragraph, before is all padding
    intermediate_tensor = tf.reduce_sum(tf.abs(targets), 1)
    zero_vector = tf.zeros(shape=(1, 1), dtype=tf.int64)
    bool_mask = tf.squeeze(tf.not_equal(intermediate_tensor, zero_vector))
    bool_mask.set_shape([None])
    targets = tf.boolean_mask(targets, bool_mask)

    # Randomly select 4 targets
    # We will also select the label_types for each selected target
    indices = tf.range(0, limit=tf.shape(targets)[0], dtype=tf.int32)
    shuffled_indices = tf.random.shuffle(indices)[:4]

    targets = tf.gather(targets, shuffled_indices)
    full_labels = tf.concat([tf.range(3, -1, -1), tf.range(4, 8)], axis=0)
    label_types = tf.boolean_mask(full_labels, bool_mask)
    label_types = tf.gather(label_types, shuffled_indices)

    # create inputs
    bert_inputs = []
    input_masks = []
    segment_ids = []
    for i in range(4):
        target_non_zero = tf.where(
            tf.not_equal(targets[i], tf.zeros_like(targets[i])))
        targets_stripped = tf.gather_nd(targets[i], target_non_zero)
        segment_id = tf.concat([
            tf.zeros_like(CLS_ID, dtype=tf.int64),
            tf.zeros_like(context_gathered),
            tf.zeros_like(SEP_ID, dtype=tf.int64),
            tf.ones_like(targets_stripped),
            tf.ones_like(SEP_ID, dtype=tf.int64)
        ],
                               axis=0)
        segment_id = pad_and_cut(segment_id, FLAGS.max_seq_length)
        segment_ids.append(segment_id)
        new_input = tf.concat(
            [CLS_ID, context_gathered, SEP_ID, targets_stripped, SEP_ID],
            axis=0)

        input_mask = tf.ones_like(new_input)
        input_mask = pad_and_cut(input_mask, FLAGS.max_seq_length)
        input_masks.append(input_mask)
        padded_new_input = pad_and_cut(new_input, FLAGS.max_seq_length)
        bert_inputs.append(padded_new_input)
    bert_inputs = tf.stack(bert_inputs, axis=0)
    input_masks = tf.stack(input_masks, axis=0)
    segment_ids = tf.stack(segment_ids, axis=0)

    out = Outputs_And_Context(bert_inputs, input_masks, segment_ids,
                              label_types, context_gathered)

    return out
Пример #21
0
def safe_log(tensor, eps=1e-16):
    is_zero = tf.less(tensor, eps)
    tensor = tf.where(is_zero, tf.ones_like(tensor), tensor)
    tensor = tf.where(is_zero, tf.zeros_like(tensor) - 1e8, tf.log(tensor))
    return tensor
Пример #22
0
    def _decode_record(record, name_to_features, vocab_table):
        """Decodes a record to a TensorFlow example."""
        target_example = tf.parse_single_example(record[0], name_to_features)
        target_example = tf.reshape(
            target_example["sents"],
            [FLAGS.max_para_length, FLAGS.max_sent_length])
        # distractor_examples = []
        # for rec in record[1:]:
        #   distractor_examples.append(
        #       tf.reshape(
        #           tf.parse_single_example(rec, name_to_features)["sents"],
        #           [FLAGS.max_para_length, FLAGS.max_sent_length]))
        # This is an unfortunate hack but is necessary to get around a TF error.
        dist0 = tf.reshape(
            tf.parse_single_example(record[1], name_to_features)["sents"],
            [FLAGS.max_para_length, FLAGS.max_sent_length])
        dist1 = tf.reshape(
            tf.parse_single_example(record[2], name_to_features)["sents"],
            [FLAGS.max_para_length, FLAGS.max_sent_length])
        dist2 = tf.reshape(
            tf.parse_single_example(record[3], name_to_features)["sents"],
            [FLAGS.max_para_length, FLAGS.max_sent_length])
        dist3 = tf.reshape(
            tf.parse_single_example(record[4], name_to_features)["sents"],
            [FLAGS.max_para_length, FLAGS.max_sent_length])

        inputs_obj = build_bert_inputs(target_example)

        distractor_obj = build_distractors([dist0, dist1, dist2, dist3],
                                           inputs_obj.context)

        example = {}
        example["input_ids"] = tf.concat(
            [inputs_obj.input_ids, distractor_obj.input_ids], axis=0)
        example["input_mask"] = tf.concat(
            [inputs_obj.input_mask, distractor_obj.input_mask], axis=0)
        example["segment_ids"] = tf.concat(
            [inputs_obj.segment_ids, distractor_obj.segment_ids], axis=0)
        example["label_types"] = inputs_obj.label_types

        # Add masking:
        if add_masking:
            mask_rate = FLAGS.mask_rate
            max_predictions_per_seq = int(
                math.ceil(FLAGS.max_seq_length * mask_rate))
            cls_token = "[CLS]"
            sep_token = "[SEP]"
            mask_token = "[MASK]"
            # pad_token = "[PAD]"
            mask_blacklist = tf.constant([cls_token,
                                          sep_token])  # , pad_token])
            mask_blacklist_ids = tf.to_int32(
                vocab_table.lookup(mask_blacklist))
            mask_token_id = tf.to_int32(
                vocab_table.lookup(tf.constant(mask_token)))
            input_ids = tf.to_int32(example["input_ids"])

            def call_sample_mask_indices(x):
                return ip.sample_mask_indices(x, mask_rate, mask_blacklist_ids,
                                              max_predictions_per_seq)

            mask_indices = tf.map_fn(call_sample_mask_indices,
                                     input_ids,
                                     dtype=tf.int32)

            def call_get_target_tokens(x):
                input_len = tf.shape(input_ids)[-1]
                x_input_id = x[:input_len]
                x_mask_indices = x[input_len:]
                return ip.get_target_tokens_for_apply(x_input_id,
                                                      x_mask_indices)

            map_input = tf.concat([input_ids, mask_indices], -1)
            target_token_ids = tf.map_fn(call_get_target_tokens, map_input)

            def call_apply_masking(x):
                input_len = tf.shape(input_ids)[-1]
                mask_idx_len = tf.shape(mask_indices)[-1]
                x_input_id = x[:input_len]
                x_mask_indices = x[input_len:input_len + mask_idx_len]
                x_target_token_ids = x[input_len + mask_idx_len:]
                return ip.apply_masking(x_input_id, x_target_token_ids,
                                        x_mask_indices, mask_token_id, 1000)

            map_input2 = tf.concat([input_ids, mask_indices, target_token_ids],
                                   -1)
            token_ids_masked = tf.map_fn(call_apply_masking,
                                         tf.to_int64(map_input2))
            target_token_weights = tf.ones_like(target_token_ids,
                                                dtype=tf.float32)
            pad_targets = tf.where(
                tf.equal(target_token_ids, 0),
                tf.ones_like(target_token_ids, dtype=tf.float32),
                tf.zeros_like(target_token_ids, dtype=tf.float32))
            target_token_weights = target_token_weights - pad_targets
            example["target_token_weights"] = target_token_weights
            example["target_token_ids"] = target_token_ids
            example["input_ids"] = token_ids_masked
            example["mask_indices"] = mask_indices

            # Set shape explicitly for TPU
            example["target_token_weights"].set_shape(
                [FLAGS.num_choices, max_predictions_per_seq])
            example["target_token_ids"].set_shape(
                [FLAGS.num_choices, max_predictions_per_seq])
            example["mask_indices"].set_shape(
                [FLAGS.num_choices, max_predictions_per_seq])

        # Set shape explicitly for TPU
        example["input_ids"].set_shape(
            [FLAGS.num_choices, FLAGS.max_seq_length])
        example["input_mask"].set_shape(
            [FLAGS.num_choices, FLAGS.max_seq_length])
        example["segment_ids"].set_shape(
            [FLAGS.num_choices, FLAGS.max_seq_length])
        example["label_types"].set_shape([4])

        example["label_ids"] = tf.scatter_nd(
            tf.reshape(example["label_types"], [4, 1]), tf.range(4), [8])

        # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
        # So cast all int64 to int32.
        for name in list(example.keys()):  # pylint: disable=g-builtin-op
            t = example[name]
            if t.dtype == tf.int64:
                t = tf.to_int32(t)
            example[name] = t

        return example
Пример #23
0
def GAN(X,
        experiment_name,
        NtoGenerate,
        z_size=100,
        g_hidden_size=128,
        d_hidden_size=128,
        alpha=0.01,
        smooth=0.1,
        learning_rate=0.0002,
        epochs=100):  # Should be used on each of the binary classes.

    checkpoint_name = "Checkpoint_" + experiment_name

    # X[np.where(y[:, 1] == 1)]

    # Hyperparameters
    # Size of input image to discriminator
    input_size = 475  # size of each window
    # Size of latent vector to generator, typically 100, however NVIDIA used N equal to size of max number of channels in the convolutions
    # z_size = 100
    # Sizes of hidden layers in generator and discriminator
    #g_hidden_size = 128
    #d_hidden_size = 128
    # Leak factor for leaky ReLU
    #alpha = 0.01
    # Label smoothing
    #smooth = 0.1

    tf.reset_default_graph()
    # Create our input placeholders
    input_real, input_z = model_inputs(input_size, z_size)

    # Generator network here
    g_model, g_logits = generator(input_z,
                                  input_size,
                                  n_units=g_hidden_size,
                                  reuse=False,
                                  alpha=alpha)
    # g_model is the generator output

    # Disriminator network here
    d_model_real, d_logits_real = discriminator(input_real,
                                                n_units=d_hidden_size,
                                                reuse=False,
                                                alpha=alpha)
    d_model_fake, d_logits_fake = discriminator(g_model,
                                                n_units=d_hidden_size,
                                                reuse=True,
                                                alpha=alpha)

    # Calculate losses
    d_loss_real = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            logits=d_logits_real,
            labels=tf.ones_like(d_logits_real) * (1 - smooth)))
    d_loss_fake = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            logits=d_logits_fake, labels=tf.zeros_like(d_logits_real)))
    d_loss = d_loss_real + d_loss_fake

    g_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            logits=d_logits_fake, labels=tf.ones_like(d_logits_fake)))

    # Get the trainable_variables, split into G and D parts
    t_vars = tf.trainable_variables()
    g_vars = [var for var in t_vars if var.name.startswith('generator')]
    d_vars = [var for var in t_vars if var.name.startswith('discriminator')]

    d_train_opt = tf.train.AdamOptimizer(learning_rate).minimize(
        d_loss, var_list=d_vars)
    g_train_opt = tf.train.AdamOptimizer(learning_rate).minimize(
        g_loss, var_list=g_vars)

    # TRAINING
    batch_size = 100  # TODO: Might have to be different size according to the size of the class (This determines amount of generated data)

    samples = []
    losses = []
    saver = tf.train.Saver(var_list=g_vars)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for e in range(epochs):
            for ii in range(len(X) // batch_size):
                #print(batch_size * ii, batch_size * (ii + 1))
                batch = X[batch_size * ii:batch_size * (ii + 1)]

                # Get images, reshape and rescale to pass to D
                # batch_images = batch[0].reshape((batch_size, 475))

                # The images should be rescaled to be between -1 and 1, as tanh works best. (Rescale back afterwards?)
                batch_images = (batch - np.min(batch)) / (
                    np.max(batch) - np.min(batch)) * (1 - (-1)) + -1
                # When rescaling back:
                # -(np.max(batch)*(-1) - (np.max(batch)*batch_images) - (np.min(batch)*1) + np.min(batch)*batch_images)/(1 - (-1))

                # Sample random noise for G
                batch_z = np.random.uniform(-1, 1, size=(batch_size, z_size))

                # Run optimizers
                _ = sess.run(d_train_opt,
                             feed_dict={
                                 input_real: batch_images,
                                 input_z: batch_z
                             })
                _ = sess.run(g_train_opt, feed_dict={input_z: batch_z})

            # At the end of each epoch, get the losses and print them out
            #train_loss_d = sess.run(d_loss, {input_z: batch_z, input_real: batch_images})
            #train_loss_g = g_loss.eval({input_z: batch_z})

            #print("Epoch {}/{}...".format(e + 1, epochs),
            #      "Discriminator Loss: {:.4f}...".format(train_loss_d),
            #      "Generator Loss: {:.4f}".format(train_loss_g))
            # Save losses to view after training
            #losses.append((train_loss_d, train_loss_g))

            # Might be unnecessary
            # Sample from generator as we're training for viewing afterwards
            sample_z = np.random.uniform(-1, 1, size=(16, z_size))
            gen_samples = sess.run(generator(input_z,
                                             input_size,
                                             n_units=g_hidden_size,
                                             reuse=True),
                                   feed_dict={input_z: sample_z})
            samples.append(gen_samples)
            saver.save(sess, './checkpoints/' + checkpoint_name + '.ckpt')

    # Save training generator samples
    with open('train_samples.pkl', 'wb') as f:
        pkl.dump(samples, f)

    #fig, ax = plt.subplots()
    #losses = np.array(losses)
    #plt.plot(losses.T[0], label='Discriminator')
    #plt.plot(losses.T[1], label='Generator')
    #plt.title("Training Losses")
    #plt.legend()
    #plt.show()

    # Generating the new observations after training:
    saver = tf.train.Saver(var_list=g_vars)
    with tf.Session() as sess:
        saver.restore(sess, './checkpoints/' + checkpoint_name + '.ckpt')
        sample_z = np.random.uniform(-1, 1, size=(NtoGenerate, z_size))
        gen_samples = sess.run(generator(input_z,
                                         input_size,
                                         n_units=g_hidden_size,
                                         reuse=True),
                               feed_dict={input_z: sample_z})

    # Scaling back to normal:
    gen_samples = 1 / 2 * gen_samples[0] * np.max(X) - 1 / 2 * gen_samples[
        0] * np.min(X) + 1 / 2 * np.max(X) + 1 / 2 * np.min(X)

    return gen_samples
Пример #24
0
            tf.shape(
                tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
            0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
        default_groundtruth_weights)

    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
      # Set all keypoints that are not labeled to NaN.
      gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
      gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
      visibilities_tiled = tf.tile(
          tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1),
          [1, 1, 2])
      tensor_dict[gt_kpt_fld] = tf.where(
          visibilities_tiled,
          tensor_dict[gt_kpt_fld],
          np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))

    if self._expand_hierarchy_labels:
      input_fields = fields.InputDataFields
      image_classes, image_confidences = self._expand_image_label_hierarchy(
          tensor_dict[input_fields.groundtruth_image_classes],
          tensor_dict[input_fields.groundtruth_image_confidences])
      tensor_dict[input_fields.groundtruth_image_classes] = image_classes
      tensor_dict[input_fields.groundtruth_image_confidences] = (
          image_confidences)

      box_fields = [
          fields.InputDataFields.groundtruth_group_of,
          fields.InputDataFields.groundtruth_is_crowd,
          fields.InputDataFields.groundtruth_difficult,
          fields.InputDataFields.groundtruth_area,
Пример #25
0
  def build_train_graph(self,
                        inputs,
                        min_depth,
                        max_depth,
                        num_mpi_planes,
                        learning_rate=0.0002,
                        beta1=0.9,
                        vgg_model_file=None,
                        global_step=0):
    """Construct the training computation graph.

    Args:
      inputs: dictionary of tensors (see 'input_data' below) needed for training
      min_depth: minimum depth for the PSV and MPI planes
      max_depth: maximum depth for the PSV and MPI planes
      num_mpi_planes: number of MPI planes to infer
      learning_rate: learning rate
      beta1: hyperparameter for Adam
      vgg_model_file: path to vgg weights (needed when vgg loss is used)
      global_step: current optimization step
    Returns:
      A train_op to be used for training.
    """
    print("starting to build graph")
    with tf.name_scope("input_size_randomization"):
      dim_choices = tf.constant([[1, 16], [2, 32], [4, 32], [4, 64], [4, 128],
                                 [8, 32], [8, 64], [8, 128]],
                                dtype=tf.int32)
      rand_dim = tf.random_shuffle(dim_choices)[0, :]
      height_div = rand_dim[0]
      width_div = rand_dim[0]
      num_mpi_planes = rand_dim[1]
      tf.summary.scalar("num_mpi_planes", num_mpi_planes)

    with tf.name_scope("setup"):
      mpi_planes = self.inv_depths(min_depth, max_depth, num_mpi_planes)

    with tf.name_scope("input_data"):
      raw_tgt_image = inputs["tgt_image"]
      raw_ref_image = inputs["ref_image"]
      raw_src_images = inputs["src_images"]

      _, img_height, img_width, _ = raw_src_images.get_shape().as_list(
      )
      img_height = img_height // height_div
      img_width = img_width // width_div

      raw_tgt_image = tf.image.convert_image_dtype(
          raw_tgt_image, dtype=tf.float32)
      raw_ref_image = tf.image.convert_image_dtype(
          raw_ref_image, dtype=tf.float32)
      raw_src_images = tf.image.convert_image_dtype(
          raw_src_images, dtype=tf.float32)
      raw_tgt_image = tf.image.resize_area(raw_tgt_image,
                                           [img_height, img_width])
      raw_ref_image = tf.image.resize_area(raw_ref_image,
                                           [img_height, img_width])
      raw_src_images = tf.image.resize_area(raw_src_images,
                                            [img_height, img_width])

      tgt_pose = inputs["tgt_pose"]
      ref_pose = inputs["ref_pose"]
      src_poses = inputs["src_poses"]
      intrinsics = inputs["intrinsics"]

      # Scale intrinsics based on size randomization
      intrinsics = tf.concat([
          intrinsics[:, 0:1, :] / tf.to_float(width_div),
          intrinsics[:, 1:2, :] / tf.to_float(height_div), intrinsics[:, 2:3, :]
      ],
                             axis=1)
      inputs["intrinsics"] = intrinsics

      _, num_source, _, _ = src_poses.get_shape().as_list()

    with tf.name_scope("inference"):
      print("setting up MPI inference")
      num_mpi_planes = tf.shape(mpi_planes)[0]
      pred = self.infer_mpi(raw_src_images, raw_ref_image, ref_pose, src_poses,
                            intrinsics, num_mpi_planes,
                            mpi_planes)
      rgba_layers = pred["rgba_layers"]
      rgba_layers_refine = pred["rgba_layers_refine"]
      stuff_behind = pred["stuff_behind"]
      refine_input_mpi = pred["refine_input_mpi"]
      psv = pred["psv"]

    with tf.name_scope("synthesis"):
      print("setting up rendering")
      rel_pose = tf.matmul(tgt_pose, tf.matrix_inverse(ref_pose))
      output_image, output_layers = self.mpi_render_view(
          rgba_layers, rel_pose, mpi_planes, intrinsics)
      output_alpha = output_layers[Ellipsis, -1]
      output_image_refine, _ = self.mpi_render_view(
          rgba_layers_refine, rel_pose, mpi_planes, intrinsics)

    with tf.name_scope("loss"):
      print("computing losses")
      # Mask loss for pixels outside reference frustum
      loss_mask = tf.where(
          tf.equal(
              tf.reduce_min(
                  tf.abs(tf.reduce_sum(output_layers, axis=-1)),
                  axis=3,
                  keep_dims=True), 0.0),
          tf.zeros_like(output_alpha[:, :, :, 0:1]),
          tf.ones_like(output_alpha[:, :, :, 0:1]))
      loss_mask = tf.stop_gradient(loss_mask)
      tf.summary.image("loss_mask", loss_mask)

      # Helper functions for loss
      def compute_error(real, fake, mask):
        return tf.reduce_mean(mask * tf.abs(fake - real))

      # Normalized VGG loss (from
      # https://github.com/CQFIO/PhotographicImageSynthesis)

      downsample = lambda tensor, ds: tf.nn.avg_pool(tensor, [1, ds, ds, 1],
                                                     [1, ds, ds, 1], "SAME")

      def vgg_loss(raw_tgt_image, output_image, loss_mask):
        """Compute VGG loss."""

        vgg_real = build_vgg19(raw_tgt_image * 255.0, vgg_model_file)
        rescaled_output_image = (output_image + 1.)/2. * 255.0
        vgg_fake = build_vgg19(
            rescaled_output_image, vgg_model_file, reuse=True)
        p0 = compute_error(vgg_real["input"], vgg_fake["input"], loss_mask)
        p1 = compute_error(vgg_real["conv1_2"],
                           vgg_fake["conv1_2"],
                           loss_mask)/2.6
        p2 = compute_error(vgg_real["conv2_2"],
                           vgg_fake["conv2_2"],
                           downsample(loss_mask, 2))/4.8
        p3 = compute_error(vgg_real["conv3_2"],
                           vgg_fake["conv3_2"],
                           downsample(loss_mask, 4))/3.7
        p4 = compute_error(vgg_real["conv4_2"],
                           vgg_fake["conv4_2"],
                           downsample(loss_mask, 8))/5.6
        p5 = compute_error(vgg_real["conv5_2"],
                           vgg_fake["conv5_2"],
                           downsample(loss_mask, 16))*10/1.5
        total_loss = p0+p1+p2+p3+p4+p5
        return total_loss, vgg_real, vgg_fake

      vgg_loss_initial, _, _ = vgg_loss(raw_tgt_image, output_image, loss_mask)
      tf.summary.scalar("vgg_loss_initial", vgg_loss_initial)
      total_loss = vgg_loss_initial

      vgg_loss_refine, _, _ = vgg_loss(raw_tgt_image, output_image_refine,
                                       loss_mask)
      tf.summary.scalar("vgg_loss_refine", vgg_loss_refine)
      total_loss += vgg_loss_refine

    with tf.name_scope("train_op"):
      print("setting up train op")
      train_vars = [var for var in tf.trainable_variables()]
      optim = tf.train.AdamOptimizer(learning_rate, beta1)
      grads_and_vars = optim.compute_gradients(total_loss, var_list=train_vars)
      train_op = [optim.apply_gradients(grads_and_vars)]

    # Summaries
    tf.summary.scalar("total_loss", total_loss)
    # Source images
    for i in range(num_source):
      src_image = raw_src_images[:, :, :, i*3:(i+1)*3]
      tf.summary.image("src_image_%d" % i, src_image)
    # Output image
    tf.summary.image("output_image", self.deprocess_image(output_image))
    # Refined output image
    tf.summary.image("output_image_refine",
                     self.deprocess_image(output_image_refine))
    # Target image
    tf.summary.image("tgt_image", raw_tgt_image)
    # Ref image
    tf.summary.image("ref_image", raw_ref_image)
    # Predicted color and alpha layers, and PSV
    num_summ = 16  # Number of plane summaries to show in tensorboard
    for i in range(num_summ):
      ind = tf.to_int32(i * num_mpi_planes/num_summ)
      rgb = rgba_layers[:, :, :, ind, :3]
      alpha = rgba_layers[:, :, :, ind, -1:]
      ref_plane = psv[:, :, :, ind, 3:6]
      source_plane = psv[:, :, :, ind, :3]
      output_rgb = output_layers[:, :, :, ind, :3]
      tf.summary.image("rgb_layer_%d" % i, self.deprocess_image(rgb))
      tf.summary.image("alpha_layer_%d" % i, alpha)
      tf.summary.image("rgba_layer_%d" % i, self.deprocess_image(rgb * alpha))
      tf.summary.image("psv_avg_%d" % i,
                       (self.deprocess_image(0.5*ref_plane + 0.5*source_plane)))
      tf.summary.image("output_rgb_%d" % i,
                       self.deprocess_image(output_rgb))
      tf.summary.image("psv_ref_%d" % i, self.deprocess_image(ref_plane))
      tf.summary.image("psv_source_%d" % i, self.deprocess_image(source_plane))

    # Cumulative rendered images and refined MPI
    for i in range(num_summ):
      ind = tf.to_int32(i * num_mpi_planes/num_summ)
      rgb = rgba_layers_refine[:, :, :, ind, :3]
      alpha = rgba_layers_refine[:, :, :, ind, 3:]
      render = stuff_behind[:, :, :, ind, :3]
      input_colors = refine_input_mpi[:, :, :, ind, :3]
      tf.summary.image("rgb_layer_refine_%d" % i, self.deprocess_image(rgb))
      tf.summary.image("alpha_layer_refine_%d" % i, alpha)
      tf.summary.image("rgba_layer_refine_%d" % i,
                       self.deprocess_image(rgb * alpha))
      tf.summary.image("cumulative_render_%d" % i, self.deprocess_image(render))
      tf.summary.image("input_colors_refine_%d" % i,
                       self.deprocess_image(input_colors))

    return train_op
Пример #26
0
def get_losses(pointclouds_pl, end_points, dir_labels_pc_cam, offset_labels_pc,
               grasp_success_labels_pc, approach_labels_pc_cam, global_config):
    """
    Computes loss terms from pointclouds, network predictions and labels 

    Arguments:
        pointclouds_pl {tf.placeholder} -- bxNx3 input point clouds
        end_points {dict[str:tf.variable]} -- endpoints of the network containing predictions
        dir_labels_pc_cam {tf.variable} -- base direction labels in camera coordinates (bxNx3)
        offset_labels_pc {tf.variable} -- grasp width labels (bxNx1) 
        grasp_success_labels_pc {tf.variable} -- contact success labels (bxNx1) 
        approach_labels_pc_cam {tf.variable} -- approach direction labels in camera coordinates (bxNx3)
        global_config {dict} -- config dict 
        
    Returns:
        [dir_cosine_loss, bin_ce_loss, offset_loss, approach_cosine_loss, adds_loss, 
        adds_loss_gt2pred, gt_control_points, pred_control_points, pos_grasps_in_view] -- All losses (not all are used for training)
    """

    grasp_dir_head = end_points['grasp_dir_head']
    grasp_offset_head = end_points['grasp_offset_head']
    approach_dir_head = end_points['approach_dir_head']

    bin_weights = global_config['DATA']['labels']['bin_weights']
    tf_bin_weights = tf.constant(bin_weights)

    min_geom_loss_divisor = tf.constant(
        float(global_config['LOSS']['min_geom_loss_divisor'])
    ) if 'min_geom_loss_divisor' in global_config['LOSS'] else tf.constant(1.)
    pos_grasps_in_view = tf.math.maximum(
        tf.reduce_sum(grasp_success_labels_pc, axis=1), min_geom_loss_divisor)

    ### ADS Gripper PC Loss
    if global_config['MODEL']['bin_offsets']:
        thickness_pred = tf.gather_nd(
            get_bin_vals(global_config),
            tf.expand_dims(tf.argmax(grasp_offset_head, axis=2), axis=2))
        thickness_gt = tf.gather_nd(
            get_bin_vals(global_config),
            tf.expand_dims(tf.argmax(offset_labels_pc, axis=2), axis=2))
    else:
        thickness_pred = grasp_offset_head[:, :, 0]
        thickness_gt = offset_labels_pc[:, :, 0]
    pred_grasps = build_6d_grasp(approach_dir_head,
                                 grasp_dir_head,
                                 pointclouds_pl,
                                 thickness_pred,
                                 use_tf=True)  # b x num_point x 4 x 4
    gt_grasps_proj = build_6d_grasp(approach_labels_pc_cam,
                                    dir_labels_pc_cam,
                                    pointclouds_pl,
                                    thickness_gt,
                                    use_tf=True)  # b x num_point x 4 x 4
    pos_gt_grasps_proj = tf.where(
        tf.broadcast_to(
            tf.expand_dims(
                tf.expand_dims(tf.cast(grasp_success_labels_pc, tf.bool), 2),
                3), gt_grasps_proj.shape), gt_grasps_proj,
        tf.ones_like(gt_grasps_proj) * 100000)
    # pos_gt_grasps_proj = tf.reshape(pos_gt_grasps_proj, (global_config['OPTIMIZER']['batch_size'], -1, 4, 4))

    gripper = mesh_utils.create_gripper('panda')
    gripper_control_points = gripper.get_control_point_tensor(
        global_config['OPTIMIZER']['batch_size'])  # b x 5 x 3
    sym_gripper_control_points = gripper.get_control_point_tensor(
        global_config['OPTIMIZER']['batch_size'], symmetric=True)

    gripper_control_points_homog = tf.concat([
        gripper_control_points,
        tf.ones((global_config['OPTIMIZER']['batch_size'],
                 gripper_control_points.shape[1], 1))
    ],
                                             axis=2)  # b x 5 x 4
    sym_gripper_control_points_homog = tf.concat([
        sym_gripper_control_points,
        tf.ones((global_config['OPTIMIZER']['batch_size'],
                 gripper_control_points.shape[1], 1))
    ],
                                                 axis=2)  # b x 5 x 4

    # only use per point pred grasps but not per point gt grasps
    control_points = tf.keras.backend.repeat_elements(
        tf.expand_dims(gripper_control_points_homog, 1),
        gt_grasps_proj.shape[1],
        axis=1)  # b x num_point x 5 x 4
    sym_control_points = tf.keras.backend.repeat_elements(
        tf.expand_dims(sym_gripper_control_points_homog, 1),
        gt_grasps_proj.shape[1],
        axis=1)  # b x num_point x 5 x 4
    pred_control_points = tf.matmul(
        control_points,
        tf.transpose(pred_grasps,
                     perm=[0, 1, 3, 2]))[:, :, :, :3]  #  b x num_point x 5 x 3

    ### Pred Grasp to GT Grasp ADD-S Loss
    gt_control_points = tf.matmul(
        control_points,
        tf.transpose(pos_gt_grasps_proj,
                     perm=[0, 1, 3, 2
                           ]))[:, :, :, :3]  #  b x num_pos_grasp_point x 5 x 3
    sym_gt_control_points = tf.matmul(
        sym_control_points,
        tf.transpose(pos_gt_grasps_proj,
                     perm=[0, 1, 3, 2
                           ]))[:, :, :, :3]  #  b x num_pos_grasp_point x 5 x 3

    squared_add = tf.reduce_sum(
        (tf.expand_dims(pred_control_points, 2) -
         tf.expand_dims(gt_control_points, 1))**2,
        axis=(3, 4))  # b x num_point x num_pos_grasp_point x ( 5 x 3)
    sym_squared_add = tf.reduce_sum(
        (tf.expand_dims(pred_control_points, 2) -
         tf.expand_dims(sym_gt_control_points, 1))**2,
        axis=(3, 4))  # b x num_point x num_pos_grasp_point x ( 5 x 3)

    # symmetric ADD-S
    neg_squared_adds = -tf.concat(
        [squared_add, sym_squared_add],
        axis=2)  # b x num_point x 2num_pos_grasp_point
    neg_squared_adds_k = tf.math.top_k(neg_squared_adds, k=1,
                                       sorted=False)[0]  # b x num_point
    # If any pos grasp exists
    min_adds = tf.minimum(
        tf.reduce_sum(grasp_success_labels_pc, axis=1, keepdims=True),
        tf.ones_like(neg_squared_adds_k[:, :, 0])
    ) * tf.sqrt(
        -neg_squared_adds_k[:, :, 0]
    )  #tf.minimum(tf.sqrt(-neg_squared_adds_k), tf.ones_like(neg_squared_adds_k)) # b x num_point
    adds_loss = tf.reduce_mean(end_points['binary_seg_pred'][:, :, 0] *
                               min_adds)

    ### GT Grasp to pred Grasp ADD-S Loss
    gt_control_points = tf.matmul(
        control_points,
        tf.transpose(gt_grasps_proj,
                     perm=[0, 1, 3, 2
                           ]))[:, :, :, :3]  #  b x num_pos_grasp_point x 5 x 3
    sym_gt_control_points = tf.matmul(
        sym_control_points,
        tf.transpose(gt_grasps_proj,
                     perm=[0, 1, 3, 2
                           ]))[:, :, :, :3]  #  b x num_pos_grasp_point x 5 x 3

    neg_squared_adds = -tf.reduce_sum(
        (tf.expand_dims(pred_control_points, 1) -
         tf.expand_dims(gt_control_points, 2))**2,
        axis=(3, 4))  # b x num_point x num_pos_grasp_point x ( 5 x 3)
    neg_squared_adds_sym = -tf.reduce_sum(
        (tf.expand_dims(pred_control_points, 1) -
         tf.expand_dims(sym_gt_control_points, 2))**2,
        axis=(3, 4))  # b x num_point x num_pos_grasp_point x ( 5 x 3)

    neg_squared_adds_k_gt2pred, pred_grasp_idcs = tf.math.top_k(
        neg_squared_adds, k=1, sorted=False)  # b x num_pos_grasp_point
    neg_squared_adds_k_sym_gt2pred, pred_grasp_sym_idcs = tf.math.top_k(
        neg_squared_adds_sym, k=1, sorted=False)  # b x num_pos_grasp_point
    pred_grasp_idcs_joined = tf.where(
        neg_squared_adds_k_gt2pred < neg_squared_adds_k_sym_gt2pred,
        pred_grasp_sym_idcs, pred_grasp_idcs)
    min_adds_gt2pred = tf.minimum(
        -neg_squared_adds_k_gt2pred,
        -neg_squared_adds_k_sym_gt2pred)  # b x num_pos_grasp_point x 1
    # min_adds_gt2pred = tf.math.exp(-min_adds_gt2pred)
    masked_min_adds_gt2pred = tf.multiply(min_adds_gt2pred[:, :, 0],
                                          grasp_success_labels_pc)

    batch_idcs = tf.meshgrid(tf.range(pred_grasp_idcs_joined.shape[1]),
                             tf.range(pred_grasp_idcs_joined.shape[0]))
    gather_idcs = tf.stack((batch_idcs[1], pred_grasp_idcs_joined[:, :, 0]),
                           axis=2)
    nearest_pred_grasp_confidence = tf.gather_nd(
        end_points['binary_seg_pred'][:, :, 0], gather_idcs)
    adds_loss_gt2pred = tf.reduce_mean(
        tf.reduce_sum(nearest_pred_grasp_confidence * masked_min_adds_gt2pred,
                      axis=1) / pos_grasps_in_view)

    ### Grasp baseline Loss
    cosine_distance = tf.constant(1.) - tf.reduce_sum(
        tf.multiply(dir_labels_pc_cam, grasp_dir_head), axis=2)
    # only pass loss where we have labeled contacts near pc points
    masked_cosine_loss = tf.multiply(cosine_distance, grasp_success_labels_pc)
    dir_cosine_loss = tf.reduce_mean(
        tf.reduce_sum(masked_cosine_loss, axis=1) / pos_grasps_in_view)

    ### Grasp Approach Loss
    approach_labels_orthog = tf.math.l2_normalize(
        approach_labels_pc_cam -
        tf.reduce_sum(tf.multiply(grasp_dir_head, approach_labels_pc_cam),
                      axis=2,
                      keepdims=True) * grasp_dir_head,
        axis=2)
    cosine_distance_approach = tf.constant(1.) - tf.reduce_sum(
        tf.multiply(approach_labels_orthog, approach_dir_head), axis=2)
    masked_approach_loss = tf.multiply(cosine_distance_approach,
                                       grasp_success_labels_pc)
    approach_cosine_loss = tf.reduce_mean(
        tf.reduce_sum(masked_approach_loss, axis=1) / pos_grasps_in_view)

    ### Grasp Offset/Thickness Loss
    if global_config['MODEL']['bin_offsets']:
        if global_config['LOSS'][
                'offset_loss_type'] == 'softmax_cross_entropy':
            offset_loss = tf.losses.softmax_cross_entropy(
                offset_labels_pc, grasp_offset_head)
        else:
            offset_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=offset_labels_pc, logits=grasp_offset_head)

            if 'too_small_offset_pred_bin_factor' in global_config[
                    'LOSS'] and global_config['LOSS'][
                        'too_small_offset_pred_bin_factor']:
                too_small_offset_pred_bin_factor = tf.constant(
                    global_config['LOSS']['too_small_offset_pred_bin_factor'],
                    tf.float32)
                collision_weight = tf.math.cumsum(
                    offset_labels_pc, axis=2, reverse=True
                ) * too_small_offset_pred_bin_factor + tf.constant(1.)
                offset_loss = tf.multiply(collision_weight, offset_loss)

            offset_loss = tf.reduce_mean(tf.multiply(
                tf.reshape(tf_bin_weights, (1, 1, -1)), offset_loss),
                                         axis=2)
    else:
        offset_loss = (grasp_offset_head[:, :, 0] -
                       offset_labels_pc[:, :, 0])**2
    masked_offset_loss = tf.multiply(offset_loss, grasp_success_labels_pc)
    offset_loss = tf.reduce_mean(
        tf.reduce_sum(masked_offset_loss, axis=1) / pos_grasps_in_view)

    ### Grasp Confidence Loss
    bin_ce_loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.expand_dims(grasp_success_labels_pc, axis=2),
        logits=end_points['binary_seg_head'])
    if 'topk_confidence' in global_config['LOSS'] and global_config['LOSS'][
            'topk_confidence']:
        bin_ce_loss, _ = tf.math.top_k(
            tf.squeeze(bin_ce_loss),
            k=global_config['LOSS']['topk_confidence'])
    bin_ce_loss = tf.reduce_mean(bin_ce_loss)

    return dir_cosine_loss, bin_ce_loss, offset_loss, approach_cosine_loss, adds_loss, adds_loss_gt2pred
Пример #27
0
 def evaluate(self, params, tpts):
     return params[0] * tf.ones_like(tpts)
Пример #28
0
def compute_labels(pos_contact_pts_mesh, pos_contact_dirs_mesh,
                   pos_contact_approaches_mesh, pos_finger_diffs, pc_cam_pl,
                   camera_pose_pl, global_config):
    """
    Project grasp labels defined on meshes onto rendered point cloud from a camera pose via nearest neighbor contacts within a maximum radius. 
    All points without nearby successful grasp contacts are considered negativ contact points.

    Arguments:
        pos_contact_pts_mesh {tf.constant} -- positive contact points on the mesh scene (Mx3)
        pos_contact_dirs_mesh {tf.constant} -- respective contact base directions in the mesh scene (Mx3)
        pos_contact_approaches_mesh {tf.constant} -- respective contact approach directions in the mesh scene (Mx3)
        pos_finger_diffs {tf.constant} -- respective grasp widths in the mesh scene (Mx1)
        pc_cam_pl {tf.placeholder} -- bxNx3 rendered point clouds
        camera_pose_pl {tf.placeholder} -- bx4x4 camera poses
        global_config {dict} -- global config

    Returns:
        [dir_labels_pc_cam, offset_labels_pc, grasp_success_labels_pc, approach_labels_pc_cam] -- Per-point contact success labels and per-contact pose labels in rendered point cloud
    """
    label_config = global_config['DATA']['labels']
    model_config = global_config['MODEL']

    nsample = label_config['k']
    radius = label_config['max_radius']
    filter_z = label_config['filter_z']
    z_val = label_config['z_val']

    xyz_cam = pc_cam_pl[:, :, :3]
    pad_homog = tf.ones((xyz_cam.shape[0], xyz_cam.shape[1], 1))
    pc_mesh = tf.matmul(
        tf.concat([xyz_cam, pad_homog], 2),
        tf.transpose(tf.linalg.inv(camera_pose_pl), perm=[0, 2, 1]))[:, :, :3]

    contact_point_offsets_batch = tf.keras.backend.repeat_elements(
        tf.expand_dims(pos_finger_diffs, 0), pc_mesh.shape[0], axis=0)

    pad_homog2 = tf.ones((pc_mesh.shape[0], pos_contact_dirs_mesh.shape[0], 1))
    contact_point_dirs_batch = tf.keras.backend.repeat_elements(
        tf.expand_dims(pos_contact_dirs_mesh, 0), pc_mesh.shape[0], axis=0)
    contact_point_dirs_batch_cam = tf.matmul(
        contact_point_dirs_batch,
        tf.transpose(camera_pose_pl[:, :3, :3], perm=[0, 2, 1]))[:, :, :3]

    pos_contact_approaches_batch = tf.keras.backend.repeat_elements(
        tf.expand_dims(pos_contact_approaches_mesh, 0),
        pc_mesh.shape[0],
        axis=0)
    pos_contact_approaches_batch_cam = tf.matmul(
        pos_contact_approaches_batch,
        tf.transpose(camera_pose_pl[:, :3, :3], perm=[0, 2, 1]))[:, :, :3]

    contact_point_batch_mesh = tf.keras.backend.repeat_elements(
        tf.expand_dims(pos_contact_pts_mesh, 0), pc_mesh.shape[0], axis=0)
    contact_point_batch_cam = tf.matmul(
        tf.concat([contact_point_batch_mesh, pad_homog2], 2),
        tf.transpose(camera_pose_pl, perm=[0, 2, 1]))[:, :, :3]

    if filter_z:
        dir_filter_passed = tf.keras.backend.repeat_elements(tf.math.greater(
            contact_point_dirs_batch_cam[:, :, 2:3], tf.constant([z_val])),
                                                             3,
                                                             axis=2)
        contact_point_batch_mesh = tf.where(
            dir_filter_passed, contact_point_batch_mesh,
            tf.ones_like(contact_point_batch_mesh) * 100000)

    squared_dists_all = tf.reduce_sum(
        (tf.expand_dims(contact_point_batch_cam, 1) -
         tf.expand_dims(xyz_cam, 2))**2,
        axis=3)
    neg_squared_dists_k, close_contact_pt_idcs = tf.math.top_k(
        -squared_dists_all, k=nsample, sorted=False)
    squared_dists_k = -neg_squared_dists_k

    # Nearest neighbor mapping
    grasp_success_labels_pc = tf.cast(
        tf.less(tf.reduce_mean(squared_dists_k, axis=2), radius * radius),
        tf.float32)  # (batch_size, num_point)

    grouped_dirs_pc_cam = group_point(contact_point_dirs_batch_cam,
                                      close_contact_pt_idcs)
    grouped_approaches_pc_cam = group_point(pos_contact_approaches_batch_cam,
                                            close_contact_pt_idcs)
    grouped_offsets = group_point(
        tf.expand_dims(contact_point_offsets_batch, 2), close_contact_pt_idcs)

    dir_labels_pc_cam = tf.math.l2_normalize(
        tf.reduce_mean(grouped_dirs_pc_cam,
                       axis=2), axis=2)  # (batch_size, num_point, 3)
    approach_labels_pc_cam = tf.math.l2_normalize(
        tf.reduce_mean(grouped_approaches_pc_cam,
                       axis=2), axis=2)  # (batch_size, num_point, 3)
    offset_labels_pc = tf.reduce_mean(grouped_offsets, axis=2)

    return dir_labels_pc_cam, offset_labels_pc, grasp_success_labels_pc, approach_labels_pc_cam
Пример #29
0
    def meta_optimize(self):
        """Meta optimization step."""

        probe_images, probe_labels = self.probe_images, self.probe_labels
        labels = self.labels
        net = self.net
        logits = self.logits
        gate_gradients = 1

        batch_size = int(self.batch_size / self.strategy.num_replicas_in_sync)
        init_eps_val = float(1) / batch_size

        meta_net = networks.MetaImage(self.net, name='meta_model')

        if FLAGS.meta_momentum and not self.optimizer.variables():
            # Initializing momentum state of optimizer for meta momentum update.
            # It is a hacky implementation
            logging.info('Pre-initialize optimizer momentum states.')
            idle_net_cost = tf.losses.sparse_softmax_cross_entropy(
                self.labels, logits)
            tmp_var_grads = self.optimizer.compute_gradients(
                tf.reduce_mean(idle_net_cost), net.trainable_variables)
            self.optimizer.apply_gradients(tmp_var_grads)

        with tf.name_scope('coefficient'):
            # Data weight coefficient
            target = tf.constant([init_eps_val] * batch_size,
                                 shape=(batch_size, ),
                                 dtype=np.float32,
                                 name='weight')
            # Data re-labeling coefficient
            eps = tf.constant([FLAGS.grad_eps_init] * batch_size,
                              shape=(batch_size, ),
                              dtype=tf.float32,
                              name='eps')

        onehot_labels = tf.one_hot(labels, self.dataset.num_classes)
        onehot_labels = tf.cast(onehot_labels, tf.float32)
        eps_k = tf.reshape(eps, [batch_size, 1])

        mixed_labels = eps_k * onehot_labels + (1 - eps_k) * self.guessed_label
        # raw softmax loss
        log_softmax = tf.nn.log_softmax(logits)
        net_cost = -tf.reduce_sum(mixed_labels * log_softmax, 1)

        lookahead_loss = tf.reduce_sum(tf.multiply(target, net_cost))
        lookahead_loss = lookahead_loss + net.regularization_loss

        with tf.control_dependencies([lookahead_loss]):
            train_vars = net.trainable_variables
            var_grads = tf.gradients(lookahead_loss,
                                     train_vars,
                                     gate_gradients=gate_gradients)

            static_vars = []
            for i in range(len(train_vars)):
                if FLAGS.meta_momentum > 0:
                    actual_grad = self.meta_momentum_update(
                        var_grads[i], train_vars[i].name, self.optimizer)
                    static_vars.append(
                        tf.math.subtract(train_vars[i],
                                         FLAGS.meta_stepsize * actual_grad))
                else:
                    static_vars.append(
                        tf.math.subtract(train_vars[i],
                                         FLAGS.meta_stepsize * var_grads[i]))
                # new style
                meta_net.add_variable_alias(static_vars[-1],
                                            var_name=train_vars[i].name)

            for uv in net.updates_variables:
                meta_net.add_variable_alias(uv,
                                            var_name=uv.name,
                                            var_type='updates_variables')
            meta_net.verbose()

        with tf.control_dependencies(static_vars):
            g_logits = meta_net(probe_images,
                                name='meta_model',
                                reuse=True,
                                training=True)

            desired_y = tf.one_hot(probe_labels, self.dataset.num_classes)
            meta_loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                desired_y, g_logits)
            meta_loss = tf.reduce_mean(meta_loss, name='meta_loss')
            meta_loss = meta_loss + meta_net.get_regularization_loss(net.wd)
            meta_acc, meta_acc_op = tf.metrics.accuracy(
                probe_labels, tf.argmax(g_logits, axis=1))

        with tf.control_dependencies([meta_loss] + [meta_acc_op]):
            meta_train_vars = meta_net.trainable_variables
            grad_meta_vars = tf.gradients(meta_loss,
                                          meta_train_vars,
                                          gate_gradients=gate_gradients)
            grad_target, grad_eps = tf.gradients(static_vars, [target, eps],
                                                 grad_ys=grad_meta_vars,
                                                 gate_gradients=gate_gradients)
        # updates weight
        raw_weight = target - grad_target
        raw_weight = raw_weight - init_eps_val
        unorm_weight = tf.clip_by_value(raw_weight,
                                        clip_value_min=0,
                                        clip_value_max=float('inf'))
        norm_c = tf.reduce_sum(unorm_weight)
        weight = tf.divide(unorm_weight, norm_c + 0.00001)

        # gets new lambda by the sign of gradient
        new_eps = tf.where(grad_eps < 0,
                           x=tf.ones_like(eps),
                           y=tf.zeros_like(eps))

        return tf.stop_gradient(weight), tf.stop_gradient(
            new_eps), meta_loss, meta_acc
Пример #30
0
def crop_mask_in_target_box(masks,
                            boxes,
                            target_boxes,
                            output_size,
                            sample_offset=0,
                            use_einsum=True):
    """Crop masks in target boxes.

  Args:
    masks: A tensor with a shape of [batch_size, num_masks, height, width].
    boxes: a float tensor representing box cooridnates that tightly enclose
      masks with a shape of [batch_size, num_masks, 4] in un-normalized
      coordinates. A box is represented by [ymin, xmin, ymax, xmax].
    target_boxes: a float tensor representing target box cooridnates for masks
      with a shape of [batch_size, num_masks, 4] in un-normalized coordinates. A
      box is represented by [ymin, xmin, ymax, xmax].
    output_size: A scalar to indicate the output crop size. It currently only
      supports to output a square shape outputs.
    sample_offset: a float number in [0, 1] indicates the subpixel sample offset
      from grid point.
    use_einsum: Use einsum to replace gather in selective_crop_and_resize.

  Returns:
    A 4-D tensor representing feature crop of shape
    [batch_size, num_boxes, output_size, output_size].
  """
    with tf.name_scope('crop_mask_in_target_box'):
        batch_size, num_masks, height, width = masks.get_shape().as_list()
        masks = tf.reshape(masks, [batch_size * num_masks, height, width, 1])
        # Pad zeros on the boundary of masks.
        masks = tf.image.pad_to_bounding_box(masks, 2, 2, height + 4,
                                             width + 4)
        masks = tf.reshape(masks,
                           [batch_size, num_masks, height + 4, width + 4, 1])

        # Projects target box locations and sizes to corresponding cropped
        # mask coordinates.
        gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(value=boxes,
                                                          num_or_size_splits=4,
                                                          axis=2)
        bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(value=target_boxes,
                                                          num_or_size_splits=4,
                                                          axis=2)
        y_transform = (bb_y_min - gt_y_min) * height / (gt_y_max - gt_y_min +
                                                        _EPSILON) + 2
        x_transform = (bb_x_min - gt_x_min) * height / (gt_x_max - gt_x_min +
                                                        _EPSILON) + 2
        h_transform = (bb_y_max - bb_y_min) * width / (gt_y_max - gt_y_min +
                                                       _EPSILON)
        w_transform = (bb_x_max - bb_x_min) * width / (gt_x_max - gt_x_min +
                                                       _EPSILON)

        boundaries = tf.concat([
            tf.to_float(tf.ones_like(y_transform) * ((height + 4) - 1)),
            tf.to_float(tf.ones_like(x_transform) * ((width + 4) - 1))
        ],
                               axis=-1)

        # Reshape tensors to have the right shape for selective_crop_and_resize.
        trasnformed_boxes = tf.concat(
            [y_transform, x_transform, h_transform, w_transform], -1)
        levels = tf.tile(tf.reshape(tf.range(num_masks), [1, num_masks]),
                         [batch_size, 1])

        cropped_masks = selective_crop_and_resize(masks,
                                                  trasnformed_boxes,
                                                  levels,
                                                  boundaries,
                                                  output_size,
                                                  sample_offset=sample_offset,
                                                  use_einsum_gather=use_einsum)
        cropped_masks = tf.squeeze(cropped_masks, axis=-1)

    return cropped_masks