Exemplo n.º 1
0
def compute_grid_positions(boxes, boundaries, output_size, sample_offset):
    """Compute the grid position w.r.t.

  the corresponding feature map.

  Args:
    boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
      information of each box w.r.t. the corresponding feature map.
      boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left
      corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float)
        in terms of the number of pixels of the corresponding feature map size.
    boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing
      the boundary (in (y, x)) of the corresponding feature map for each box.
      Any resampled grid points that go beyond the bounary will be clipped.
    output_size: a scalar indicating the output crop size.
    sample_offset: a float number in [0, 1] indicates the subpixel sample offset
      from grid point.

  Returns:
    kernel_y: Tensor of size [batch_size, boxes, output_size, 2, 1].
    kernel_x: Tensor of size [batch_size, boxes, output_size, 2, 1].
    box_grid_y0y1: Tensor of size [batch_size, boxes, output_size, 2]
    box_grid_x0x1: Tensor of size [batch_size, boxes, output_size, 2]
  """
    batch_size, num_boxes, _ = boxes.get_shape().as_list()
    box_grid_x = []
    box_grid_y = []
    for i in range(output_size):
        box_grid_x.append(boxes[:, :, 1] +
                          (i + sample_offset) * boxes[:, :, 3] / output_size)
        box_grid_y.append(boxes[:, :, 0] +
                          (i + sample_offset) * boxes[:, :, 2] / output_size)
    box_grid_x = tf.stack(box_grid_x, axis=2)
    box_grid_y = tf.stack(box_grid_y, axis=2)

    box_grid_y0 = tf.floor(box_grid_y)
    box_grid_x0 = tf.floor(box_grid_x)
    box_grid_x0 = tf.maximum(0., box_grid_x0)
    box_grid_y0 = tf.maximum(0., box_grid_y0)

    box_grid_x0 = tf.minimum(box_grid_x0,
                             tf.expand_dims(boundaries[:, :, 1], -1))
    box_grid_x1 = tf.minimum(box_grid_x0 + 1,
                             tf.expand_dims(boundaries[:, :, 1], -1))
    box_grid_y0 = tf.minimum(box_grid_y0,
                             tf.expand_dims(boundaries[:, :, 0], -1))
    box_grid_y1 = tf.minimum(box_grid_y0 + 1,
                             tf.expand_dims(boundaries[:, :, 0], -1))

    box_gridx0x1 = tf.stack([box_grid_x0, box_grid_x1], axis=-1)
    box_gridy0y1 = tf.stack([box_grid_y0, box_grid_y1], axis=-1)

    # The RoIAlign feature f can be computed by bilinear interpolation of four
    # neighboring feature points f0, f1, f2, and f3.
    # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
    #                       [f10, f11]]
    # f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11
    # f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11
    ly = box_grid_y - box_grid_y0
    lx = box_grid_x - box_grid_x0
    hy = 1.0 - ly
    hx = 1.0 - lx
    kernel_y = tf.reshape(tf.stack([hy, ly], axis=3),
                          [batch_size, num_boxes, output_size, 2, 1])
    kernel_x = tf.reshape(tf.stack([hx, lx], axis=3),
                          [batch_size, num_boxes, output_size, 2, 1])
    return kernel_y, kernel_x, box_gridy0y1, box_gridx0x1
Exemplo n.º 2
0
def lrelu(x, leak=0.2, name="lrelu"):
    return tf.maximum(x, leak * x)
Exemplo n.º 3
0
    def train_dvrl(self, perf_metric):
        """Trains DVRL based on the specified objective function.

    Args:
      perf_metric: 'auc', 'accuracy', 'log-loss' for classification
                   'mae', 'mse', 'rmspe' for regression
    """

        # Generates selected probability
        est_data_value = self.data_value_evaluator()

        # Generator loss (REINFORCE algorithm)
        prob = tf.reduce_sum(self.s_input * tf.log(est_data_value + self.epsilon) +\
                             (1-self.s_input) * \
                             tf.log(1 - est_data_value + self.epsilon))
        dve_loss = (-self.reward_input * prob) + \
                    1e3 * (tf.maximum(tf.reduce_mean(est_data_value) \
                                      - self.threshold, 0) + \
                           tf.maximum((1-self.threshold) - \
                                      tf.reduce_mean(est_data_value), 0))

        # Variable
        dve_vars = [v for v in tf.trainable_variables() \
                    if v.name.startswith('data_value_estimator')]

        # Solver
        dve_solver = tf.train.AdamOptimizer(self.learning_rate).minimize(
            dve_loss, var_list=dve_vars)

        # Baseline performance
        if self.flag_sgd:
            y_valid_hat = self.ori_model.predict(self.x_valid)
        else:
            if self.problem == 'classification':
                y_valid_hat = self.ori_model.predict_proba(self.x_valid)
            elif self.problem == 'regression':
                y_valid_hat = self.ori_model.predict(self.x_valid)

        if perf_metric == 'auc':
            valid_perf = metrics.roc_auc_score(self.y_valid, y_valid_hat[:, 1])
        elif perf_metric == 'accuracy':
            valid_perf = metrics.accuracy_score(self.y_valid,
                                                np.argmax(y_valid_hat, axis=1))
        elif perf_metric == 'log_loss':
            valid_perf = -metrics.log_loss(self.y_valid, y_valid_hat)
        elif perf_metric == 'rmspe':
            valid_perf = dvrl_metrics.rmspe(self.y_valid, y_valid_hat)
        elif perf_metric == 'mae':
            valid_perf = metrics.mean_absolute_error(self.y_valid, y_valid_hat)
        elif perf_metric == 'mse':
            valid_perf = metrics.mean_squared_error(self.y_valid, y_valid_hat)

        # Prediction differences
        if self.flag_sgd:
            y_train_valid_pred = self.val_model.predict(self.x_train)
        else:
            if self.problem == 'classification':
                y_train_valid_pred = self.val_model.predict_proba(self.x_train)
            elif self.problem == 'regression':
                y_train_valid_pred = self.val_model.predict(self.x_train)
                y_train_valid_pred = np.reshape(y_train_valid_pred, [-1, 1])

        if self.problem == 'classification':
            y_pred_diff = np.abs(self.y_train_onehot - y_train_valid_pred)
        elif self.problem == 'regression':
            y_pred_diff = \
                np.abs(self.y_train_onehot - y_train_valid_pred)/self.y_train_onehot

        # Main session
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        # Model save at the end
        saver = tf.train.Saver(dve_vars)

        for _ in tqdm.tqdm(range(self.outer_iterations)):

            # Batch selection
            batch_idx = \
                np.random.permutation(len(self.x_train[:, 0]))[:self.batch_size]

            x_batch = self.x_train[batch_idx, :]
            y_batch_onehot = self.y_train_onehot[batch_idx]
            y_batch = self.y_train[batch_idx]
            y_hat_batch = y_pred_diff[batch_idx]

            # Generates selection probability
            est_dv_curr = sess.run(est_data_value,
                                   feed_dict={
                                       self.x_input: x_batch,
                                       self.y_input: y_batch_onehot,
                                       self.y_hat_input: y_hat_batch
                                   })

            # Samples the selection probability
            sel_prob_curr = np.random.binomial(1, est_dv_curr,
                                               est_dv_curr.shape)

            # Exception (When selection probability is 0)
            if np.sum(sel_prob_curr) == 0:
                est_dv_curr = 0.5 * np.ones(np.shape(est_dv_curr))
                sel_prob_curr = np.random.binomial(1, est_dv_curr,
                                                   est_dv_curr.shape)

            # Trains predictor
            # If the predictor is neural network
            if 'summary' in dir(self.pred_model):

                new_model = self.pred_model
                new_model.load_weights('tmp/pred_model.h5')

                # Train the model
                new_model.fit(x_batch,
                              y_batch_onehot,
                              sample_weight=sel_prob_curr[:, 0],
                              batch_size=self.batch_size_predictor,
                              epochs=self.inner_iterations,
                              verbose=False)

                y_valid_hat = new_model.predict(self.x_valid)

            else:
                new_model = self.pred_model
                new_model.fit(x_batch, y_batch, sel_prob_curr[:, 0])

            # Prediction
            if 'summary' in dir(new_model):
                y_valid_hat = new_model.predict(self.x_valid)
            else:
                if self.problem == 'classification':
                    y_valid_hat = new_model.predict_proba(self.x_valid)
                elif self.problem == 'regression':
                    y_valid_hat = new_model.predict(self.x_valid)

            # Reward computation
            if perf_metric == 'auc':
                dvrl_perf = metrics.roc_auc_score(self.y_valid, y_valid_hat[:,
                                                                            1])
            elif perf_metric == 'accuracy':
                dvrl_perf = metrics.accuracy_score(
                    self.y_valid, np.argmax(y_valid_hat, axis=1))
            elif perf_metric == 'log_loss':
                dvrl_perf = -metrics.log_loss(self.y_valid, y_valid_hat)
            elif perf_metric == 'rmspe':
                dvrl_perf = dvrl_metrics.rmspe(self.y_valid, y_valid_hat)
            elif perf_metric == 'mae':
                dvrl_perf = metrics.mean_absolute_error(
                    self.y_valid, y_valid_hat)
            elif perf_metric == 'mse':
                dvrl_perf = metrics.mean_squared_error(self.y_valid,
                                                       y_valid_hat)

            if self.problem == 'classification':
                reward_curr = dvrl_perf - valid_perf
            elif self.problem == 'regression':
                reward_curr = valid_perf - dvrl_perf

            # Trains the generator
            _, _ = sess.run(
                [dve_solver, dve_loss],
                feed_dict={
                    self.x_input: x_batch,
                    self.y_input: y_batch_onehot,
                    self.y_hat_input: y_hat_batch,
                    self.s_input: sel_prob_curr,
                    self.reward_input: reward_curr
                })

        # Saves trained model
        saver.save(sess, self.checkpoint_file_name)

        # Trains DVRL predictor
        # Generate data values
        final_data_value = sess.run(est_data_value,
                                    feed_dict={
                                        self.x_input: self.x_train,
                                        self.y_input: self.y_train_onehot,
                                        self.y_hat_input: y_pred_diff
                                    })[:, 0]

        # Trains final model
        # If the final model is neural network
        if 'summary' in dir(self.pred_model):
            self.final_model.load_weights('tmp/pred_model.h5')
            # Train the model
            self.final_model.fit(self.x_train,
                                 self.y_train_onehot,
                                 sample_weight=final_data_value,
                                 batch_size=self.batch_size_predictor,
                                 epochs=self.inner_iterations,
                                 verbose=False)
        else:
            self.final_model.fit(self.x_train, self.y_train, final_data_value)
Exemplo n.º 4
0
def _leaky_relu(x):
    return tf.maximum(0.2 * x, x)
Exemplo n.º 5
0
    def _update_critic_td3(self, obs, action, next_obs, reward, mask):
        """Updates parameters of td3 critic given samples from the batch.

    Args:
       obs: A tfe.Variable with a batch of observations.
       action: A tfe.Variable with a batch of actions.
       next_obs: A tfe.Variable with a batch of next observations.
       reward: A tfe.Variable with a batch of rewards.
       mask: A tfe.Variable with a batch of masks.
    """
        # Avoid using tensorflow random functions since it's impossible to get
        # the state of the random number generator used by TensorFlow.
        target_action_noise = np.random.normal(
            size=action.get_shape(), scale=self.policy_noise).astype('float32')
        target_action_noise = contrib_eager_python_tfe.Variable(
            target_action_noise)

        target_action_noise = tf.clip_by_value(target_action_noise,
                                               -self.policy_noise_clip,
                                               self.policy_noise_clip)

        noisy_action_targets = self.actor_target(
            next_obs) + target_action_noise

        clipped_noisy_action_targets = tf.clip_by_value(
            noisy_action_targets, -1, 1)

        if self.use_absorbing_state:
            # Starting from the goal state we can execute only non-actions.
            a_mask = tf.maximum(0, mask)
            q_next1, q_next2 = self.critic_target(
                next_obs, clipped_noisy_action_targets * a_mask)
            q_next = tf.reduce_min(tf.concat([q_next1, q_next2], -1),
                                   -1,
                                   keepdims=True)
            q_target = reward + self.discount * q_next
        else:
            q_next1, q_next2 = self.critic_target(
                next_obs, clipped_noisy_action_targets)
            q_next = tf.reduce_min(tf.concat([q_next1, q_next2], -1),
                                   -1,
                                   keepdims=True)
            q_target = reward + self.discount * mask * q_next

        with tf.GradientTape() as tape:
            q_pred1, q_pred2 = self.critic(obs, action)
            critic_loss = tf.losses.mean_squared_error(
                q_target, q_pred1) + tf.losses.mean_squared_error(
                    q_target, q_pred2)

        grads = tape.gradient(critic_loss, self.critic.variables)
        self.critic_optimizer.apply_gradients(zip(grads,
                                                  self.critic.variables),
                                              global_step=self.critic_step)

        if self.use_absorbing_state:
            with contrib_summary.record_summaries_every_n_global_steps(
                    100, self.critic_step):
                a_mask = tf.maximum(0, -mask)
                if tf.reduce_sum(a_mask).numpy() > 0:
                    contrib_summary.scalar('critic/absorbing_reward',
                                           tf.reduce_sum(reward * a_mask) /
                                           tf.reduce_sum(a_mask),
                                           step=self.critic_step)

        with contrib_summary.record_summaries_every_n_global_steps(
                100, self.critic_step):
            contrib_summary.scalar('critic/loss',
                                   critic_loss,
                                   step=self.critic_step)
Exemplo n.º 6
0
def leaky_relu(input_, **kwargs):
    if input_.dtype in [tf.complex64, tf.complex128]:
        raise TypeError('leaky-relu currently does not support complex input')
    leak = kwargs.get('leak', 0.1)
    return tf.maximum(input_, input_ * leak, name='lrelu')
def mask(config: configure_pretraining.PretrainingConfig,
         inputs: pretrain_data.Inputs, mask_prob, proposal_distribution=1.0,
         disallow_from_mask=None, already_masked=None):
    """Implementation of dynamic masking. The optional arguments aren't needed for
    BERT/ELECTRA and are from early experiments in "strategically" masking out
    tokens instead of uniformly at random.

    Args:
      config: configure_pretraining.PretrainingConfig
      inputs: pretrain_data.Inputs containing input input_ids/input_mask
      mask_prob: percent of tokens to mask
      proposal_distribution: for non-uniform masking can be a [B, L] tensor
                             of scores for masking each position.
      disallow_from_mask: a boolean tensor of [B, L] of positions that should
                          not be masked out
      already_masked: a boolean tensor of [B, N] of already masked-out tokens
                      for multiple rounds of masking
    Returns: a pretrain_data.Inputs with masking added
    """
    # Get the batch size, sequence length, and max masked-out tokens
    N = config.max_predictions_per_seq
    B, L = modeling.get_shape_list(inputs.input_ids)

    # Find indices where masking out a token is allowed
    vocab = tokenization.FullTokenizer(
        config.vocab_file, config.model_sentencepiece_path, do_lower_case=config.do_lower_case).vocab
    candidates_mask = _get_candidates_mask(inputs, vocab, disallow_from_mask)

    # Set the number of tokens to mask out per example
    num_tokens = tf.cast(tf.reduce_sum(inputs.input_mask, -1), tf.float32)
    num_to_predict = tf.maximum(1, tf.minimum(
        N, tf.cast(tf.round(num_tokens * mask_prob), tf.int32)))
    masked_lm_weights = tf.cast(
        tf.sequence_mask(num_to_predict, N), tf.float32)
    if already_masked is not None:
        masked_lm_weights *= (1 - already_masked)

    # Get a probability of masking each position in the sequence
    candidate_mask_float = tf.cast(candidates_mask, tf.float32)
    sample_prob = (proposal_distribution * candidate_mask_float)
    sample_prob /= tf.reduce_sum(sample_prob, axis=-1, keepdims=True)

    # Sample the positions to mask out
    sample_prob = tf.stop_gradient(sample_prob)
    sample_logits = tf.log(sample_prob)
    masked_lm_positions = tf.random.categorical(
        sample_logits, N, dtype=tf.int32)
    masked_lm_positions *= tf.cast(masked_lm_weights, tf.int32)

    # Get the ids of the masked-out tokens
    shift = tf.expand_dims(L * tf.range(B), -1)
    flat_positions = tf.reshape(masked_lm_positions + shift, [-1, 1])
    masked_lm_ids = tf.gather_nd(tf.reshape(inputs.input_ids, [-1]),
                                 flat_positions)
    masked_lm_ids = tf.reshape(masked_lm_ids, [B, -1])
    masked_lm_ids *= tf.cast(masked_lm_weights, tf.int32)

    # Update the input ids
    replace_with_mask_positions = masked_lm_positions * tf.cast(
        tf.less(tf.random.uniform([B, N]), 0.85), tf.int32)
    inputs_ids, _ = scatter_update(
        inputs.input_ids, tf.fill([B, N], vocab["[MASK]"]),
        replace_with_mask_positions)

    return pretrain_data.get_updated_inputs(
        inputs,
        input_ids=tf.stop_gradient(inputs_ids),
        masked_lm_positions=masked_lm_positions,
        masked_lm_ids=masked_lm_ids,
        masked_lm_weights=masked_lm_weights
    )
Exemplo n.º 8
0
def compute_mel_filterbank_features(waveforms,
                                    sample_rate=16000,
                                    dither=1.0 / np.iinfo(np.int16).max,
                                    preemphasis=0.97,
                                    frame_length=25,
                                    frame_step=10,
                                    fft_length=None,
                                    window_fn=functools.partial(
                                        tf.signal.hann_window, periodic=True),
                                    lower_edge_hertz=80.0,
                                    upper_edge_hertz=7600.0,
                                    num_mel_bins=80,
                                    log_noise_floor=1e-3,
                                    apply_mask=True):
    """Implement mel-filterbank extraction using tf ops.

  Args:
    waveforms: float32 tensor with shape [batch_size, max_len]
    sample_rate: sampling rate of the waveform
    dither: stddev of Gaussian noise added to waveform to prevent quantization
      artefacts
    preemphasis: waveform high-pass filtering constant
    frame_length: frame length in ms
    frame_step: frame_Step in ms
    fft_length: number of fft bins
    window_fn: windowing function
    lower_edge_hertz: lowest frequency of the filterbank
    upper_edge_hertz: highest frequency of the filterbank
    num_mel_bins: filterbank size
    log_noise_floor: clip small values to prevent numeric overflow in log
    apply_mask: When working on a batch of samples, set padding frames to zero
  Returns:
    filterbanks: a float32 tensor with shape [batch_size, len, num_bins, 1]
  """
    # `stfts` is a complex64 Tensor representing the short-time Fourier
    # Transform of each signal in `signals`. Its shape is
    # [batch_size, ?, fft_unique_bins]
    # where fft_unique_bins = fft_length // 2 + 1

    # Find the wave length: the largest index for which the value is !=0
    # note that waveforms samples that are exactly 0.0 are quite common, so
    # simply doing sum(waveforms != 0, axis=-1) will not work correctly.
    wav_lens = tf.reduce_max(
        tf.expand_dims(tf.range(tf.shape(waveforms)[1]), 0) *
        tf.to_int32(tf.not_equal(waveforms, 0.0)),
        axis=-1) + 1
    if dither > 0:
        waveforms += tf.random_normal(tf.shape(waveforms), stddev=dither)
    if preemphasis > 0:
        waveforms = waveforms[:, 1:] - preemphasis * waveforms[:, :-1]
        wav_lens -= 1
    frame_length = int(frame_length * sample_rate / 1e3)
    frame_step = int(frame_step * sample_rate / 1e3)
    if fft_length is None:
        fft_length = int(2**(np.ceil(np.log2(frame_length))))

    stfts = tf.signal.stft(waveforms,
                           frame_length=frame_length,
                           frame_step=frame_step,
                           fft_length=fft_length,
                           window_fn=window_fn,
                           pad_end=True)

    stft_lens = (wav_lens + (frame_step - 1)) // frame_step
    masks = tf.to_float(
        tf.less_equal(tf.expand_dims(tf.range(tf.shape(stfts)[1]), 0),
                      tf.expand_dims(stft_lens, 1)))

    # An energy spectrogram is the magnitude of the complex-valued STFT.
    # A float32 Tensor of shape [batch_size, ?, 257].
    magnitude_spectrograms = tf.abs(stfts)

    # Warp the linear-scale, magnitude spectrograms into the mel-scale.
    num_spectrogram_bins = magnitude_spectrograms.shape[-1].value
    linear_to_mel_weight_matrix = (tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
        upper_edge_hertz))
    mel_spectrograms = tf.tensordot(magnitude_spectrograms,
                                    linear_to_mel_weight_matrix, 1)
    # Note: Shape inference for tensordot does not currently handle this case.
    mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate(
        linear_to_mel_weight_matrix.shape[-1:]))

    log_mel_sgram = tf.log(tf.maximum(log_noise_floor, mel_spectrograms))

    if apply_mask:
        log_mel_sgram *= tf.expand_dims(tf.to_float(masks), -1)

    return tf.expand_dims(log_mel_sgram, -1, name="mel_sgrams")
def make_graph(ops, op_types, interpreter):

    height = 144
    width = 256

    tensors = {}
    input_details = interpreter.get_input_details()
    # output_details = interpreter.get_output_details()
    print(input_details)
    for input_detail in input_details:
        tensors[input_detail['index']] = tf.placeholder(
            dtype=input_detail['dtype'],
            shape=input_detail['shape'],
            name=input_detail['name'])

    for op in ops:
        print('@@@@@@@@@@@@@@ op:', op)
        op_type = op_types[op['opcode_index']]
        if op_type == 'CONV_2D':
            input_tensor = tensors[op['inputs'][0]]
            weights = tensors[op['inputs'][1]].transpose(1, 2, 3, 0)
            bias = tensors[op['inputs'][2]]
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            options = op['builtin_options']
            output_tensor = tf.nn.conv2d(
                input_tensor,
                weights,
                strides=[1, options['stride_h'], options['stride_w'], 1],
                padding=options['padding'],
                dilations=[
                    1, options['dilation_h_factor'],
                    options['dilation_w_factor'], 1
                ],
                name=output_detail['name'] + '/conv2d')
            output_tensor = tf.add(output_tensor,
                                   bias,
                                   name=output_detail['name'])

            if output_detail['name'].split('/')[-1] == 'Relu6':
                output_tensor = tf.nn.relu6(output_tensor)

            tensors[output_detail['index']] = output_tensor
        elif op_type == 'DEPTHWISE_CONV_2D':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor = tensors[op['inputs'][0]]
            weights = tensors[op['inputs'][1]].transpose(1, 2, 3, 0)
            bias = tensors[op['inputs'][2]]
            options = op['builtin_options']
            output_tensor = tf.nn.depthwise_conv2d(
                input_tensor,
                weights,
                strides=[1, options['stride_h'], options['stride_w'], 1],
                padding=options['padding'],
                # dilations=[1, options['dilation_h_factor'], options['dilation_w_factor'], 1],
                name=output_detail['name'] + '/depthwise_conv2d')
            output_tensor = tf.add(output_tensor,
                                   bias,
                                   name=output_detail['name'])
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'MAX_POOL_2D':
            input_tensor = tensors[op['inputs'][0]]
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            options = op['builtin_options']
            output_tensor = tf.nn.max_pool(
                input_tensor,
                ksize=[
                    1, options['filter_height'], options['filter_width'], 1
                ],
                strides=[1, options['stride_h'], options['stride_w'], 1],
                padding=options['padding'],
                name=output_detail['name'])
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'PAD':
            input_tensor = tensors[op['inputs'][0]]
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            paddings_detail = interpreter._get_tensor_details(op['inputs'][1])
            paddings_array = interpreter.get_tensor(paddings_detail['index'])
            paddings = tf.Variable(paddings_array,
                                   name=paddings_detail['name'])
            output_tensor = tf.pad(input_tensor,
                                   paddings,
                                   name=output_detail['name'])
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'RELU':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor = tensors[op['inputs'][0]]
            output_tensor = tf.nn.relu(input_tensor,
                                       name=output_detail['name'])
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'PRELU':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor = tensors[op['inputs'][0]]
            alpha_detail = interpreter._get_tensor_details(op['inputs'][1])
            alpha_array = interpreter.get_tensor(alpha_detail['index'])
            with tf.variable_scope(name_or_scope=output_detail['name']):
                alphas = tf.Variable(alpha_array, name=alpha_detail['name'])
                output_tensor = tf.maximum(alphas * input_tensor, input_tensor)
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'RELU6':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor = tensors[op['inputs'][0]]
            output_tensor = tf.nn.relu6(input_tensor,
                                        name=output_detail['name'])
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'RESHAPE':
            input_tensor = tensors[op['inputs'][0]]
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            options = op['builtin_options']
            output_tensor = tf.reshape(input_tensor,
                                       options['new_shape'],
                                       name=output_detail['name'])
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'ADD':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor_0 = tensors[op['inputs'][0]]
            try:
                input_tensor_1 = tensors[op['inputs'][1]]
            except:
                param = interpreter._get_tensor_details(op['inputs'][1])
                input_tensor_1 = interpreter.get_tensor(param['index'])
            output_tensor = tf.add(input_tensor_0,
                                   input_tensor_1,
                                   name=output_detail['name'])

            if output_detail['name'].split('/')[-1] == 'Relu6':
                output_tensor = tf.nn.relu6(output_tensor)

            tensors[output_detail['index']] = output_tensor
        elif op_type == 'CONCATENATION':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor_0 = tensors[op['inputs'][0]]
            input_tensor_1 = tensors[op['inputs'][1]]
            try:
                input_tensor_2 = tensors[op['inputs'][2]]
                options = op['builtin_options']
                output_tensor = tf.concat(
                    [input_tensor_0, input_tensor_1, input_tensor_2],
                    options['axis'],
                    name=output_detail['name'])
            except:
                options = op['builtin_options']
                output_tensor = tf.concat([input_tensor_0, input_tensor_1],
                                          options['axis'],
                                          name=output_detail['name'])
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'LOGISTIC':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor = tensors[op['inputs'][0]]
            output_tensor = tf.math.sigmoid(input_tensor,
                                            name=output_detail['name'])
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'TRANSPOSE_CONV':
            input_tensor = tensors[op['inputs'][2]]
            weights_detail = interpreter._get_tensor_details(op['inputs'][1])
            output_shape_detail = interpreter._get_tensor_details(
                op['inputs'][0])
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            weights_array = interpreter.get_tensor(weights_detail['index'])
            weights_array = np.transpose(weights_array, (1, 2, 0, 3))
            output_shape_array = interpreter.get_tensor(
                output_shape_detail['index'])
            weights = tf.Variable(weights_array, name=weights_detail['name'])
            shape = tf.Variable(output_shape_array,
                                name=output_shape_detail['name'])
            options = op['builtin_options']
            output_tensor = tf.nn.conv2d_transpose(
                input_tensor,
                weights,
                shape, [1, options['stride_h'], options['stride_w'], 1],
                padding=options['padding'],
                name=output_detail['name'] + '/conv2d_transpose')
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'MUL':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor_0 = tensors[op['inputs'][0]]
            input_tensor_1 = None
            try:
                input_tensor_1 = tensors[op['inputs'][1]]
            except:
                param = interpreter._get_tensor_details(op['inputs'][1])
                input_tensor_1 = interpreter.get_tensor(param['index'])
            output_tensor = tf.multiply(input_tensor_0,
                                        input_tensor_1,
                                        name=output_detail['name'])
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'HARD_SWISH':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor = tensors[op['inputs'][0]]
            output_tensor = optimizing_hardswish_for_edgetpu(
                input_tensor, name=output_detail['name'])
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'AVERAGE_POOL_2D':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor = tensors[op['inputs'][0]]
            options = op['builtin_options']
            pool_size = [options['filter_height'], options['filter_width']]
            strides = [options['stride_h'], options['stride_w']]
            padding = options['padding']
            output_tensor = tf.keras.layers.AveragePooling2D(
                pool_size=pool_size,
                strides=strides,
                padding=padding,
                name=output_detail['name'])(input_tensor)
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'FULLY_CONNECTED':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor = tensors[op['inputs'][0]]
            weights = tensors[op['inputs'][1]].transpose(1, 0)
            bias = tensors[op['inputs'][2]]
            output_shape_detail = interpreter._get_tensor_details(
                op['inputs'][0])
            output_shape_array = interpreter.get_tensor(
                output_shape_detail['index'])

            output_tensor = tf.keras.layers.Dense(
                units=output_shape_array.shape[3],
                use_bias=True,
                kernel_initializer=tf.keras.initializers.Constant(weights),
                bias_initializer=tf.keras.initializers.Constant(bias))(
                    input_tensor)
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'RESIZE_BILINEAR':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            input_tensor = tensors[op['inputs'][0]]
            size_detail = interpreter._get_tensor_details(op['inputs'][1])
            size = interpreter.get_tensor(size_detail['index'])
            size_height = size[0]
            size_width = size[1]

            def upsampling2d_bilinear(x, size_height, size_width):
                if optimizing_for_edgetpu_flg:
                    return tf.image.resize_bilinear(x,
                                                    (size_height, size_width))
                else:
                    return tfv2.image.resize(x, [size_height, size_width],
                                             method='bilinear')

            output_tensor = tf.keras.layers.Lambda(upsampling2d_bilinear,
                                                   arguments={
                                                       'size_height':
                                                       size_height,
                                                       'size_width': size_width
                                                   })(input_tensor)
            tensors[output_detail['index']] = output_tensor
        elif op_type == 'DEQUANTIZE':
            output_detail = interpreter._get_tensor_details(op['outputs'][0])
            weights_detail = interpreter._get_tensor_details(op['inputs'][0])
            weights = interpreter.get_tensor(weights_detail['index'])
            output_tensor = weights.astype(np.float32)
            tensors[output_detail['index']] = output_tensor
        else:
            raise ValueError(op_type)

    # Convolution2DTransposeBias
    input_tensor = tensors[241]
    weights = np.load('weights/segment_Kernel').transpose(1, 2, 0,
                                                          3).astype(np.float32)
    bias = np.load('weights/segment_Bias').astype(np.float32)
    custom_trans = tf.nn.conv2d_transpose(input=input_tensor,
                                          filters=weights,
                                          output_shape=[1, height, width, 2],
                                          strides=[2, 2],
                                          padding='SAME',
                                          dilations=[1, 1])
    output_tensor = tf.math.add(custom_trans, bias, name='segment')
    tensors[999] = output_tensor
Exemplo n.º 10
0
        def _body(i, posterior, center, wx, activation_biases, sigma_biases,
                  input_activation, tile_filter):
            """Body of EM while loop."""
            tf.logging.info('  Wx: %s', wx)

            beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32)))

            posterior = tf.Print(posterior, [
                layer_name, i, h, ih,
                tf.reduce_min(posterior),
                tf.reduce_max(posterior)
            ],
                                 message='posterior')
            # route: [outdim, height?, width?, batch, indim]
            with tf.name_scope('vote_conf'):
                vote_conf = posterior * input_activation
                vote_conf = tf.maximum(vote_conf, 0.0)

            # masses: [batch, 1, outdim, 1, height, width, 1, 1]
            with tf.name_scope('masses'):
                masses = tf.reduce_sum(vote_conf,
                                       axis=[1, -1, -2],
                                       keepdims=True,
                                       name='masses_calculation') + 0.0000001
            with tf.name_scope('preactivate_unrolled'):
                preactivate_unrolled = vote_conf * wx

            # center: [batch, 1, outdim, outatom, height, width]
            with tf.name_scope('center'):
                center = .9 * tf.reduce_sum(
                    preactivate_unrolled, axis=[1, -1, -2],
                    keepdims=True) / masses + .1 * center

            # Rematerialization to save GPU memory. (+22ms/-1.6GB)
            # @tf.contrib.layers.recompute_grad
            def compute_noise_and_variance(wx, center, vote_conf, masses):
                noise = tf.squared_difference(wx, center)
                variance = min_var + tf.reduce_sum(
                    vote_conf * noise,
                    axis=[1, -1, -2],
                    keepdims=True,
                    name='variance_calculation') / masses
                return noise, variance

            with tf.name_scope('compute_noise_and_variance'):
                noise, variance = compute_noise_and_variance(
                    wx, center, vote_conf, masses)

            with tf.name_scope('win'):
                log_variance = tf.log(variance)
                p_i = -1 * tf.reduce_sum(log_variance, axis=3, keepdims=True)
                log_2pi = tf.log(2 * math.pi)
                sigma_b = tf.log(sigma_biases * sigma_biases + min_var)
                win = masses * (p_i - num_out_atoms *
                                (sigma_b + log_2pi + 1.0))
            with tf.name_scope('logit'):
                logit = beta * (win - activation_biases * 50 * num_out_atoms)
            with tf.name_scope('activation_update'):
                activation_update = tf.minimum(
                    0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit)))
            with tf.name_scope('sigma_update'):
                log_det_sigma = -1 * p_i
                sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0
            with tf.name_scope('exp_update'):
                exp_update = tf.reduce_sum(noise / (2 * variance),
                                           axis=3,
                                           keep_dims=True)
            prior_update = tf.subtract(activation_update - sigma_update,
                                       exp_update,
                                       name='prior_update_sub')
            max_prior_update = tf.reduce_max(prior_update,
                                             axis=[2, 3, 4, 5, 6, 7],
                                             keepdims=True,
                                             name='max_prior_opdate')
            prior_normal = tf.add(prior_update, -1 * max_prior_update)
            prior_exp = tf.exp(prior_normal)
            prior_exp_out = tf.reduce_sum(prior_exp,
                                          axis=2,
                                          keepdims=True,
                                          name='prior_exp_out')
            prior_exp_reshape = tf.reshape(prior_exp_out, [-1, h, h, k * k],
                                           name='prior_exp_reshape')

            sum_prior = tf.nn.conv2d_transpose(prior_exp_reshape,
                                               tile_filter,
                                               output_shape=[b * c, ih, ih, 1],
                                               strides=[1, s, s, 1],
                                               padding='VALID')
            sum_prior = tf.maximum(1e-6, sum_prior)

            sum_prior_patch = utils.kernel_tile(sum_prior,
                                                k,
                                                s,
                                                1,
                                                name='sum_prior_patch')

            with utils.maybe_jit_scope(), tf.name_scope('posterior'):
                sum_prior_reshape = tf.reshape(
                    sum_prior_patch, [-1, input_dim, 1, 1, h, h, k, k])
                posterior = prior_exp / sum_prior_reshape

            return (i + 1, posterior, logit, center, masses)
Exemplo n.º 11
0
    def prepare_model(self):

        # input rating vector
        self.input_R_U = tf.placeholder(dtype=tf.float32,
                                        shape=[None, self.num_cols],
                                        name="input_R_U")
        self.input_R_I = tf.placeholder(dtype=tf.float32,
                                        shape=[self.num_rows, None],
                                        name="input_R_I")
        self.input_OH_I = tf.placeholder(dtype=tf.float32,
                                         shape=[None, self.num_cols],
                                         name="input_OH_I")
        self.input_P_cor = tf.placeholder(dtype=tf.int32,
                                          shape=[None, 2],
                                          name="input_P_cor")
        self.input_N_cor = tf.placeholder(dtype=tf.int32,
                                          shape=[None, 2],
                                          name="input_N_cor")

        # input indicator vector indicator
        self.row_idx = tf.placeholder(dtype=tf.int32,
                                      shape=[None, 1],
                                      name="row_idx")
        self.col_idx = tf.placeholder(dtype=tf.int32,
                                      shape=[None, 1],
                                      name="col_idx")

        # user component
        # first layer weights
        UV = tf.get_variable(name="UV",
                             initializer=tf.truncated_normal(
                                 shape=[self.num_cols, self.U_hidden_neuron],
                                 mean=0,
                                 stddev=0.03),
                             dtype=tf.float32)
        # second layer weights
        UW = tf.get_variable(name="UW",
                             initializer=tf.truncated_normal(
                                 shape=[self.U_hidden_neuron, self.num_cols],
                                 mean=0,
                                 stddev=0.03),
                             dtype=tf.float32)
        # first layer bias
        Ub1 = tf.get_variable(name="Ub1",
                              initializer=tf.truncated_normal(
                                  shape=[1, self.U_hidden_neuron],
                                  mean=0,
                                  stddev=0.03),
                              dtype=tf.float32)
        # second layer bias
        Ub2 = tf.get_variable(name="Ub2",
                              initializer=tf.truncated_normal(
                                  shape=[1, self.num_cols],
                                  mean=0,
                                  stddev=0.03),
                              dtype=tf.float32)

        # item component
        # first layer weights
        IV = tf.get_variable(name="IV",
                             initializer=tf.truncated_normal(
                                 shape=[self.num_rows, self.I_hidden_neuron],
                                 mean=0,
                                 stddev=0.03),
                             dtype=tf.float32)
        # second layer weights
        IW = tf.get_variable(name="IW",
                             initializer=tf.truncated_normal(
                                 shape=[self.I_hidden_neuron, self.num_rows],
                                 mean=0,
                                 stddev=0.03),
                             dtype=tf.float32)
        # first layer bias
        Ib1 = tf.get_variable(name="Ib1",
                              initializer=tf.truncated_normal(
                                  shape=[1, self.I_hidden_neuron],
                                  mean=0,
                                  stddev=0.03),
                              dtype=tf.float32)
        # second layer bias
        Ib2 = tf.get_variable(name="Ib2",
                              initializer=tf.truncated_normal(
                                  shape=[1, self.num_rows],
                                  mean=0,
                                  stddev=0.03),
                              dtype=tf.float32)

        I_factor_vector = tf.get_variable(
            name="I_factor_vector",
            initializer=tf.random_uniform(shape=[1, self.num_cols]),
            dtype=tf.float32)

        # user component
        U_pre_Encoder = tf.matmul(self.input_R_U,
                                  UV) + Ub1  # input to the hidden layer
        self.U_Encoder = self.g_act(
            U_pre_Encoder)  # output of the hidden layer
        U_pre_Decoder = tf.matmul(self.U_Encoder,
                                  UW) + Ub2  # input to the output layer
        self.U_Decoder = self.f_act(
            U_pre_Decoder)  # output of the output layer

        # item component
        I_pre_mul = tf.transpose(
            tf.matmul(I_factor_vector, tf.transpose(self.input_OH_I)))
        I_pre_Encoder = tf.matmul(tf.transpose(self.input_R_I),
                                  IV) + Ib1  # input to the hidden layer
        self.I_Encoder = self.g_act(I_pre_Encoder *
                                    I_pre_mul)  # output of the hidden layer
        I_pre_Decoder = tf.matmul(self.I_Encoder,
                                  IW) + Ib2  # input to the output layer
        self.I_Decoder = self.f_act(
            I_pre_Decoder)  # output of the output layer

        # final output
        self.Decoder = (
            (tf.transpose(
                tf.gather_nd(tf.transpose(self.U_Decoder), self.col_idx))) +
            tf.gather_nd(tf.transpose(self.I_Decoder), self.row_idx)) / 2.0

        pos_data = tf.gather_nd(self.Decoder, self.input_P_cor)
        neg_data = tf.gather_nd(self.Decoder, self.input_N_cor)

        pre_cost1 = tf.maximum(neg_data - pos_data + self.margin,
                               tf.zeros(tf.shape(neg_data)[0]))
        cost1 = tf.reduce_sum(pre_cost1)  # prediction squared error
        pre_cost2 = tf.square(self.l2_norm(UW)) + tf.square(self.l2_norm(UV)) \
                    + tf.square(self.l2_norm(IW)) + tf.square(self.l2_norm(IV))\
                    + tf.square(self.l2_norm(Ib1)) + tf.square(self.l2_norm(Ib2))\
                    + tf.square(self.l2_norm(Ub1)) + tf.square(self.l2_norm(Ub2))
        cost2 = self.lambda_value * 0.5 * pre_cost2  # regularization term

        self.cost = cost1 + cost2  # the loss function

        if self.optimizer_method == "Adam":
            optimizer = tf.train.AdamOptimizer(self.lr)
        elif self.optimizer_method == "Adadelta":
            optimizer = tf.train.AdadeltaOptimizer(self.lr)
        elif self.optimizer_method == "Adagrad":
            optimizer = tf.train.AdadeltaOptimizer(self.lr)
        elif self.optimizer_method == "RMSProp":
            optimizer = tf.train.RMSPropOptimizer(self.lr)
        elif self.optimizer_method == "GradientDescent":
            optimizer = tf.train.GradientDescentOptimizer(self.lr)
        elif self.optimizer_method == "Momentum":
            optimizer = tf.train.MomentumOptimizer(self.lr, 0.9)
        else:
            raise ValueError("Optimizer Key ERROR")

        gvs = optimizer.compute_gradients(self.cost)
        self.optimizer = optimizer.apply_gradients(
            gvs, global_step=self.global_step)
Exemplo n.º 12
0
    def __init__(self, environment, summary_dir="./"):
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
        config = tf.ConfigProto(log_device_placement=False,
                                device_count={'GPU': True})
        config.gpu_options.per_process_gpu_memory_fraction = 0.1

        self.state_size, self.action_size = environment.observation_space.shape, environment.action_space.shape[
            0]
        self.action_bound_high = environment.action_space.high
        self.action_bound_low = environment.action_space.low
        self.actions = tf.placeholder(tf.float32, [None, self.action_size],
                                      'action')
        self.beta = 0.01
        self.learning_rate = 0.0001
        self.minibatch = 32
        self.epsilon = 0.21
        self.critic_coefficient = 0.5
        self.l2_regular = 0.001

        self.sess = tf.Session(config=config)
        self.state = tf.placeholder(tf.float32, [None, 64, 64, 4], 'state')
        self.advantage = tf.placeholder(tf.float32, [None, 1], 'advantage')
        self.rewards = tf.placeholder(tf.float32, [None, 1], 'd_rewards')

        self.dataset = tf.data.Dataset.from_tensor_slices({
            "state":
            self.state,
            "actions":
            self.actions,
            "rewards":
            self.rewards,
            "advantage":
            self.advantage
        })
        self.dataset = self.dataset.shuffle(buffer_size=10000)
        self.dataset = self.dataset.batch(self.minibatch)
        self.dataset = self.dataset.cache()
        self.dataset = self.dataset.repeat(4)
        self.iterator = self.dataset.make_initializable_iterator()
        batch = self.iterator.get_next()

        old_policy, old_policy_params = self.Actor(batch["state"], 'oldpolicy')
        policy, policy_params = self.Actor(batch["state"], 'policy')
        policy_eval, _ = self.Actor(self.state, 'policy', reuse=True)

        old_value, old_value_params = self.Critic(batch["state"], "oldvalue")
        self.value, value_params = self.Critic(batch["state"], "value")
        self.value_eval, _ = self.Critic(self.state, 'value', reuse=True)

        self.sample_action = tf.squeeze(policy_eval.sample(1),
                                        axis=0,
                                        name="sample_action")
        self.global_step = tf.train.get_or_create_global_step()
        self.saver = tf.train.Saver()

        with tf.variable_scope('loss'):
            with tf.variable_scope('actor'):
                ratio = tf.maximum(policy.prob(batch["actions"]),
                                   1e-6) / tf.maximum(
                                       old_policy.prob(batch["actions"]), 1e-6)
                ratio = tf.clip_by_value(ratio, 0, 10)
                surr1 = batch["advantage"] * ratio
                surr2 = batch["advantage"] * tf.clip_by_value(
                    ratio, 1 - self.epsilon, 1 + self.epsilon)
                loss_policy = -tf.reduce_mean(tf.minimum(surr1, surr2))
                tf.summary.scalar("loss", loss_policy)

            with tf.variable_scope('critic'):
                loss_actor = tf.reduce_mean(
                    tf.square(self.value - batch["rewards"])) * 0.5
                tf.summary.scalar("loss", loss_actor)

            with tf.variable_scope('entropy'):
                entropy = policy.entropy()
                pol_entpen = -self.beta * tf.reduce_mean(entropy)

            loss = loss_policy + loss_actor * self.critic_coefficient + pol_entpen
            tf.summary.scalar("total", loss)

        with tf.variable_scope('train'):
            opt = tf.train.AdamOptimizer(self.learning_rate)
            self.trainer = opt.minimize(loss,
                                        global_step=self.global_step,
                                        var_list=policy_params + value_params)

        with tf.variable_scope('update_old'):
            self.update_old_policy_op = [
                oldp.assign(p)
                for p, oldp in zip(policy_params, old_policy_params)
            ]
            self.update_old_value_op = [
                oldp.assign(p)
                for p, oldp in zip(value_params, old_value_params)
            ]

        self.writer = tf.summary.FileWriter(summary_dir, self.sess.graph)
        self.sess.run(tf.global_variables_initializer())

        tf.summary.scalar("value", tf.reduce_mean(self.value))
        tf.summary.scalar("policy_entropy", tf.reduce_mean(entropy))
        tf.summary.scalar("sigma", tf.reduce_mean(policy.stddev()))
        self.board = tf.summary.merge(tf.get_collection(
            tf.GraphKeys.SUMMARIES))
Exemplo n.º 13
0
def resize_image(image, bboxes=None, min_size=None, max_size=None):
    """
    We need to resize image and (optionally) bounding boxes when the biggest
    side dimension is bigger than `max_size` or when the smaller side is
    smaller than `min_size`. If no max_size defined it won't scale down and if
    no min_size defined it won't scale up.

    Then, using the ratio we used, we need to properly scale the bounding
    boxes.

    Args:
        image: Tensor with image of shape (H, W, 3).
        bboxes: Optional Tensor with bounding boxes with shape (num_bboxes, 5).
            where we have (x_min, y_min, x_max, y_max, label) for each one.
        min_size: Min size of width or height.
        max_size: Max size of width or height.

    Returns:
        Dictionary containing:
            image: Tensor with scaled image.
            bboxes: Tensor with scaled (using the same factor as the image)
                bounding boxes with shape (num_bboxes, 5).
            scale_factor: Scale factor used to modify the image (1.0 means no
                change).
    """
    image_shape = tf.to_float(tf.shape(image))
    height = image_shape[0]
    width = image_shape[1]

    if min_size is not None:
        # We calculate the upscale factor, the rate we need to use to end up
        # with an image with it's lowest dimension at least `image_min_size`.
        # In case of being big enough the scale factor is 1. (no change)
        min_size = tf.to_float(min_size)
        min_dimension = tf.minimum(height, width)
        upscale_factor = tf.maximum(min_size / min_dimension, 1.0)
    else:
        upscale_factor = tf.constant(1.0)

    if max_size is not None:
        # We do the same calculating the downscale factor, to end up with an
        # image where the biggest dimension is less than `image_max_size`.
        # When the image is small enough the scale factor is 1. (no change)
        max_size = tf.to_float(max_size)
        max_dimension = tf.maximum(height, width)
        downscale_factor = tf.minimum(max_size / max_dimension, 1.0)
    else:
        downscale_factor = tf.constant(1.0)

    scale_factor = upscale_factor * downscale_factor

    # New size is calculate using the scale factor and rounding to int.
    new_height = height * scale_factor
    new_width = width * scale_factor

    # Resize image using TensorFlow's own `resize_image` utility.
    image = tf.image.resize_images(image,
                                   tf.stack(
                                       tf.to_int32([new_height, new_width])),
                                   method=tf.image.ResizeMethod.BILINEAR)

    if bboxes is not None:
        bboxes = adjust_bboxes(bboxes,
                               old_height=height,
                               old_width=width,
                               new_height=new_height,
                               new_width=new_width)
        return {
            "image": image,
            "bboxes": bboxes,
            "scale_factor": scale_factor,
        }

    return {
        "image": image,
        "scale_factor": scale_factor,
    }
Exemplo n.º 14
0
    def call(self, x):
        input_image, y_pred, y_true, true_boxes = x

        # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
        y_pred = tf.reshape(
            y_pred,
            tf.concat([tf.shape(input=y_pred)[:3],
                       tf.constant([3, -1])],
                      axis=0))

        # initialize the masks
        object_mask = tf.expand_dims(y_true[..., 4], 4)

        # the variable to keep track of number of batches processed
        batch_seen = tf.Variable(0.)

        # compute grid factor and net factor
        grid_h = tf.shape(input=y_true)[1]
        grid_w = tf.shape(input=y_true)[2]
        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32),
                                 [1, 1, 1, 1, 2])

        net_h = tf.shape(input=input_image)[1]
        net_w = tf.shape(input=input_image)[2]
        net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32),
                                [1, 1, 1, 1, 2])
        """
        Adjust prediction
        """
        pred_box_xy = (self.cell_grid[:, :grid_h, :grid_w, :, :] +
                       tf.sigmoid(y_pred[..., :2]))  # sigma(t_xy) + c_xy
        pred_box_wh = y_pred[..., 2:4]  # t_wh
        pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]),
                                       4)  # adjust confidence
        pred_box_class = y_pred[..., 5:]  # adjust class probabilities
        """
        Adjust ground truth
        """
        true_box_xy = y_true[..., 0:2]  # (sigma(t_xy) + c_xy)
        true_box_wh = y_true[..., 2:4]  # t_wh
        true_box_conf = tf.expand_dims(y_true[..., 4], 4)
        true_box_class = tf.argmax(input=y_true[..., 5:], axis=-1)
        """
        Compare each predicted box to all true boxes
        """
        # initially, drag all objectness of all boxes to 0
        conf_delta = pred_box_conf - 0

        # then, ignore the boxes which have good overlap with some true box
        true_xy = true_boxes[..., 0:2] / grid_factor
        true_wh = true_boxes[..., 2:4] / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
        pred_wh = tf.expand_dims(
            tf.exp(pred_box_wh) * self.anchors / net_factor, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)

        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)

        best_ious = tf.reduce_max(input_tensor=iou_scores, axis=4)
        conf_delta *= tf.expand_dims(
            tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4)
        """
        Compute some online statistics
        """
        true_xy = true_box_xy / grid_factor
        true_wh = tf.exp(true_box_wh) * self.anchors / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = pred_box_xy / grid_factor
        pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)
        iou_scores = object_mask * tf.expand_dims(iou_scores, 4)

        count = tf.reduce_sum(input_tensor=object_mask)
        count_noobj = tf.reduce_sum(input_tensor=1 - object_mask)
        detect_mask = tf.cast((pred_box_conf * object_mask) >= 0.5,
                              dtype=tf.float32)
        class_mask = tf.expand_dims(
            tf.cast(tf.equal(tf.argmax(input=pred_box_class, axis=-1),
                             true_box_class),
                    dtype=tf.float32), 4)
        recall50 = tf.reduce_sum(
            input_tensor=tf.cast(iou_scores >= 0.5, dtype=tf.float32) *
            detect_mask * class_mask) / (count + 1e-3)
        recall75 = tf.reduce_sum(
            input_tensor=tf.cast(iou_scores >= 0.75, dtype=tf.float32) *
            detect_mask * class_mask) / (count + 1e-3)
        avg_iou = tf.reduce_sum(input_tensor=iou_scores) / (count + 1e-3)
        avg_obj = tf.reduce_sum(input_tensor=pred_box_conf *
                                object_mask) / (count + 1e-3)
        avg_noobj = tf.reduce_sum(input_tensor=pred_box_conf *
                                  (1 - object_mask)) / (count_noobj + 1e-3)
        avg_cat = tf.reduce_sum(input_tensor=object_mask *
                                class_mask) / (count + 1e-3)
        """
        Warm-up training
        """
        batch_seen = tf.assign_add(batch_seen, 1.)

        true_box_xy, true_box_wh, xywh_mask = tf.cond(
            pred=tf.less(batch_seen, self.warmup_batches + 1),
            true_fn=lambda: [
                true_box_xy +
                (0.5 + self.cell_grid[:, :grid_h, :grid_w, :, :]) *
                (1 - object_mask), true_box_wh + tf.zeros_like(true_box_wh) *
                (1 - object_mask),
                tf.ones_like(object_mask)
            ],
            false_fn=lambda: [true_box_xy, true_box_wh, object_mask])
        """
        Compare each true box to all anchor boxes
        """
        wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
        wh_scale = tf.expand_dims(
            2 - wh_scale[..., 0] * wh_scale[..., 1],
            axis=4)  # the smaller the box, the bigger the scale

        xy_delta = xywh_mask * (pred_box_xy -
                                true_box_xy) * wh_scale * self.xywh_scale
        wh_delta = xywh_mask * (pred_box_wh -
                                true_box_wh) * wh_scale * self.xywh_scale
        conf_delta = object_mask * (
            pred_box_conf - true_box_conf) * self.obj_scale + (
                1 - object_mask) * conf_delta * self.noobj_scale
        class_delta = object_mask * \
                      tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
                      self.class_scale

        loss_xy = tf.reduce_sum(input_tensor=tf.square(xy_delta),
                                axis=list(range(1, 5)))
        loss_wh = tf.reduce_sum(input_tensor=tf.square(wh_delta),
                                axis=list(range(1, 5)))
        loss_conf = tf.reduce_sum(input_tensor=tf.square(conf_delta),
                                  axis=list(range(1, 5)))
        loss_class = tf.reduce_sum(input_tensor=class_delta,
                                   axis=list(range(1, 5)))

        loss = loss_xy + loss_wh + loss_conf + loss_class

        loss = tf.Print(loss, [grid_h, avg_obj],
                        message='avg_obj \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_noobj],
                        message='avg_noobj \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_iou],
                        message='avg_iou \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_cat],
                        message='avg_cat \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, recall50],
                        message='recall50 \t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, recall75],
                        message='recall75 \t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, count],
                        message='count \t',
                        summarize=1000)
        loss = tf.Print(loss, [
            grid_h,
            tf.reduce_sum(input_tensor=loss_xy),
            tf.reduce_sum(input_tensor=loss_wh),
            tf.reduce_sum(input_tensor=loss_conf),
            tf.reduce_sum(input_tensor=loss_class)
        ],
                        message='loss xy, wh, conf, class: \t',
                        summarize=1000)

        return loss * self.grid_scale
  def last_value_quantize(self,
                          inputs,
                          per_channel=False,
                          init_min=-6.0,
                          init_max=6.0,
                          name_prefix='FixedValueQuant',
                          reuse=None,
                          is_training=False,
                          num_bits=8,
                          narrow_range=False,
                          relative_quantile=0,
                          freeze=False,
                          quant_delay=False):
    """Adds a layer that collects quantization ranges as last input ranges.

    LastValueQuantize creates variables called 'min' and 'max', representing the
    interval used for quantization and clamping.

    Args:
      inputs: a tensor containing values to be quantized.
      per_channel: (Optional) a boolean specifying whether to use different
        quantization ranges per output channel.
      init_min: a float scalar, the initial value for variable min.
      init_max: a float scalar, the initial value for variable max.
      name_prefix: name_prefix for created nodes.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      is_training: Whether the op is applied to a training or eval graph.
      num_bits: Number of bits to use for quantization, must be between 2 and 8.
      narrow_range: Whether to use the narrow quantization range
        [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
      relative_quantile: Specify the location of quantization min and max
        parameters. relative_quantile = 0 is equivalent to using min and max
        of input; relative_quantile = 1 set min and max the optimal location
        assuming the input distribution is uniform. In reality, a good value
        should be in the range [0 1].
      freeze: If True, the min and max variables are calculated once at the
        begining of training and then freeze. This is used for quantized
        fine-tuning of a pretrained checkpoint. If False, the min and max are
        calculated and updated every cycle.
      quant_delay: The number of global steps after which the fake quantization
        are turned on. Used for performing fine-tuning experiment without
        starting from a pre-trained checkpoint.
    Returns:
      a tensor containing quantized values.
    """

    with tf.variable_scope(
        None, default_name=name_prefix, values=[inputs], reuse=reuse) as scope:
      scope.set_partitioner(None)
      input_shape = inputs.get_shape()
      input_dim = len(input_shape)
      if per_channel:
        # Only support quantizing 1-, 2- and 4-dimensional tensors.
        assert input_dim in [1, 2, 4]
        min_max_shape = [input_shape[-1]]
      else:
        min_max_shape = []

      min_var = tf.get_variable('min',
                                min_max_shape,
                                tf.float32,
                                initializer=tf.constant_initializer(init_min),
                                trainable=False)
      max_var = tf.get_variable('max',
                                min_max_shape,
                                tf.float32,
                                initializer=tf.constant_initializer(init_max),
                                trainable=False)
      if not is_training:
        return self.delayed_quant(
            inputs,
            min_var,
            max_var,
            per_channel=per_channel,
            num_bits=num_bits,
            narrow_range=narrow_range,
            quant_delay=None)

      if per_channel:
        if input_dim == 2:
          reduce_dims = [0]
        elif input_dim == 4:
          reduce_dims = [0, 1, 2]

      if num_bits >= 4:
        quantile = 0
      else:
        quantile = (1.0 / 2.0**(num_bits + 1.0)) * relative_quantile * 100

      if per_channel:
        if input_dim >= 2:
          batch_min = tfp.stats.percentile(
              inputs, q=quantile, axis=reduce_dims, name='BatchMin')
        else:
          batch_min = inputs
      else:
        batch_min = tfp.stats.percentile(
            inputs, q=quantile, name='BatchMin')

      if per_channel:
        if input_dim >= 2:
          batch_max = tfp.stats.percentile(
              inputs, q=100 - quantile, axis=reduce_dims, name='BatchMax')
        else:
          batch_max = inputs
      else:
        batch_max = tfp.stats.percentile(
            inputs, q=100 - quantile, name='BatchMax')

      if narrow_range:
        multiplier = 1.0
      else:
        multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0)

      batch_abs_max = tf.maximum(tf.abs(batch_min), tf.abs(batch_max))

      if narrow_range:
        batch_adjusted_min = 0 - batch_abs_max
      else:
        multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0)
        batch_adjusted_min = 0 - tf.scalar_mul(multiplier, batch_abs_max)

      batch_abs_max = tf.cast(batch_abs_max, tf.float32)
      batch_adjusted_min = tf.cast(batch_adjusted_min, tf.float32)

      if freeze:
        def make_var_op(var):
          def f():
            return var
          return f

        quant_step = common.CreateOrGetQuantizationStep()
        min_max_assign = tf.less_equal(
            quant_step, 1, name='MinMaxAssign')
        min_value = tf.cond(min_max_assign,
                            make_var_op(batch_adjusted_min),
                            make_var_op(min_var),
                            name='AssignMinCond')
        max_value = tf.cond(min_max_assign,
                            make_var_op(batch_abs_max),
                            make_var_op(max_var),
                            name='AssignMaxCond')
      else:
        min_value = batch_adjusted_min
        max_value = batch_abs_max

      assign_min = tf.assign(min_var, min_value)
      assign_max = tf.assign(max_var, max_value)

      return self.delayed_quant(
          inputs,
          assign_min,
          assign_max,
          per_channel=per_channel,
          num_bits=num_bits,
          narrow_range=narrow_range,
          quant_delay=quant_delay)
  def _compute_model_loss(
      self, input_sequence, output_sequence, sequence_length, control_sequence):
    """Builds a model with loss for train/eval."""
    hparams = self.hparams
    batch_size = hparams.batch_size

    input_sequence = tf.to_float(input_sequence)
    output_sequence = tf.to_float(output_sequence)

    max_seq_len = tf.minimum(tf.shape(output_sequence)[1], hparams.max_seq_len)

    input_sequence = input_sequence[:, :max_seq_len]

    if control_sequence is not None:
      control_depth = control_sequence.shape[-1]
      control_sequence = tf.to_float(control_sequence)
      control_sequence = control_sequence[:, :max_seq_len]
      # Shouldn't be necessary, but the slice loses shape information when
      # control depth is zero.
      control_sequence.set_shape([batch_size, None, control_depth])

    # The target/expected outputs.
    x_target = output_sequence[:, :max_seq_len]
    # Inputs to be fed to decoder, including zero padding for the initial input.
    x_input = tf.pad(output_sequence[:, :max_seq_len - 1],
                     [(0, 0), (1, 0), (0, 0)])
    x_length = tf.minimum(sequence_length, max_seq_len)

    # Either encode to get `z`, or do unconditional, decoder-only.
    if hparams.z_size:  # vae mode:
      q_z = self.encode(input_sequence, x_length, control_sequence)
      z = q_z.sample()

      # Prior distribution.
      p_z = ds.MultivariateNormalDiag(
          loc=[0.] * hparams.z_size, scale_diag=[1.] * hparams.z_size)

      # KL Divergence (nats)
      kl_div = ds.kl_divergence(q_z, p_z)

      # Concatenate the Z vectors to the inputs at each time step.
    else:  # unconditional, decoder-only generation
      kl_div = tf.zeros([batch_size, 1], dtype=tf.float32)
      z = None

    r_loss, metric_map = self.decoder.reconstruction_loss(
        x_input, x_target, x_length, z, control_sequence)[0:2]

    free_nats = hparams.free_bits * tf.math.log(2.0)
    kl_cost = tf.maximum(kl_div - free_nats, 0)

    beta = ((1.0 - tf.pow(hparams.beta_rate, tf.to_float(self.global_step)))
            * hparams.max_beta)
    self.loss = tf.reduce_mean(r_loss) + beta * tf.reduce_mean(kl_cost)

    scalars_to_summarize = {
        'loss': self.loss,
        'losses/r_loss': r_loss,
        'losses/kl_loss': kl_cost,
        'losses/kl_bits': kl_div / tf.math.log(2.0),
        'losses/kl_beta': beta,
    }
    return metric_map, scalars_to_summarize
Exemplo n.º 17
0
 def tensors_to_item(self, keys_to_tensors):
   unmapped_tensor = super(_ClassTensorHandler,
                           self).tensors_to_item(keys_to_tensors)
   return tf.maximum(self._name_to_id_table.lookup(unmapped_tensor),
                     self._display_name_to_id_table.lookup(unmapped_tensor))
Exemplo n.º 18
0
def lrelu(input_, leak=0.2, name="lrelu"):
  return tf.maximum(input_, leak * input_, name=name)
Exemplo n.º 19
0
def _get_final_index(sequence_length, time_major=True):
  indices = [tf.maximum(0, sequence_length - 1),
             tf.range(sequence_length.shape[0])]
  if not time_major:
    indices = indices[-1::-1]
  return tf.stack(indices, axis=1)
Exemplo n.º 20
0
def compute_mask_prob_from_yao_schedule(i, n, pmin=0.1, pmax=0.9, alpha=0.7):
    wat = (pmax - pmin) * i / n
    return tf.maximum(pmin, pmax - wat / alpha)
def _legacy_sqrt_decay(step):
  """Decay like 1 / sqrt(step), multiplied by 500 to normalize."""
  return 500.0 / tf.sqrt(tf.maximum(step, 1.0))
Exemplo n.º 22
0
 def _resource_apply_dense(self, grad, handle):
     var = handle
     grad = tf.to_float(grad)
     grad_squared = tf.square(grad) + self._epsilon1
     grad_squared_mean = tf.reduce_mean(grad_squared)
     decay_rate = self._decay_rate
     update_scale = self._learning_rate
     old_val = var
     if var.dtype.base_dtype == tf.bfloat16:
         old_val = tf.to_float(self._parameter_encoding.decode(old_val))
     if self._multiply_by_parameter_scale:
         update_scale *= tf.to_float(self._parameter_scale(old_val))
     # HACK: Make things dependent on grad.
     # This confounds the XLA rewriter and keeps it from fusing computations
     # across different variables.  This fusion is a bad for HBM usage, since
     # it causes the gradients to persist in memory.
     decay_rate += grad_squared_mean * 1e-30
     update_scale += grad_squared_mean * 1e-30
     # END HACK
     mixing_rate = 1.0 - decay_rate
     shape = var.get_shape().as_list()
     updates = []
     if self._should_use_factored_second_moment_estimate(shape):
         grad_squared_row_mean = tf.reduce_mean(grad_squared, -1)
         grad_squared_col_mean = tf.reduce_mean(grad_squared, -2)
         vr = self.get_slot(var, "vr")
         new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean)
         vc = self.get_slot(var, "vc")
         new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean)
         vr_update = tf.assign(vr, new_vr, use_locking=self._use_locking)
         vc_update = tf.assign(vc, new_vc, use_locking=self._use_locking)
         updates = [vr_update, vc_update]
         long_term_mean = tf.reduce_mean(new_vr, -1, keepdims=True)
         r_factor = tf.rsqrt(new_vr / long_term_mean)
         c_factor = tf.rsqrt(new_vc)
         x = grad * tf.expand_dims(r_factor, -1) * tf.expand_dims(
             c_factor, -2)
     else:
         v = self.get_slot(var, "v")
         new_v = decay_rate * v + mixing_rate * grad_squared
         v_update = tf.assign(v, new_v, use_locking=self._use_locking)
         updates = [v_update]
         x = grad * tf.rsqrt(new_v)
     if self._clipping_threshold is not None:
         clipping_denom = tf.maximum(
             1.0,
             reduce_rms(x) / self._clipping_threshold)
         x /= clipping_denom
     subtrahend = update_scale * x
     if self._beta1:
         m = self.get_slot(var, "m")
         new_m = self._beta1 * tf.to_float(m) + (1.0 -
                                                 self._beta1) * subtrahend
         subtrahend = new_m
         new_m = common_layers.cast_like(new_m, var)
         updates.append(tf.assign(m, new_m, use_locking=self._use_locking))
     new_val = tf.to_float(old_val) - subtrahend
     if var.dtype.base_dtype == tf.bfloat16:
         new_val = self._parameter_encoding.encode(new_val,
                                                   self._quantization_noise)
     if self._simulated_quantize_bits:
         new_val = quantization.simulated_quantize(
             var - subtrahend, self._simulated_quantize_bits,
             self._quantization_noise)
     var_update = tf.assign(var, new_val, use_locking=self._use_locking)
     updates = [var_update] + updates
     return tf.group(*updates)
Exemplo n.º 23
0
    def __init__(self,
                 sess,
                 model,
                 batch_size=1,
                 confidence=CONFIDENCE,
                 targeted=TARGETED,
                 learning_rate=LEARNING_RATE,
                 binary_search_steps=BINARY_SEARCH_STEPS,
                 max_iterations=MAX_ITERATIONS,
                 print_every=100,
                 early_stop_iters=0,
                 abort_early=ABORT_EARLY,
                 initial_const=INITIAL_CONST,
                 use_log=False,
                 use_tanh=True,
                 use_resize=False,
                 adam_beta1=0.9,
                 adam_beta2=0.999,
                 reset_adam_after_found=False,
                 solver="adam",
                 save_ckpts="",
                 load_checkpoint="",
                 start_iter=0,
                 init_size=32,
                 use_importance=True):
        """
        The L_2 optimized attack. 

        This attack is the most efficient and should be used as the primary 
        attack to evaluate potential defenses.

        Returns adversarial examples for the supplied model.

        confidence: Confidence of adversarial examples: higher produces examples
          that are farther away, but more strongly classified as adversarial.
        batch_size: Number of gradient evaluations to run simultaneously.
        targeted: True if we should perform a targetted attack, False otherwise.
        learning_rate: The learning rate for the attack algorithm. Smaller values
          produce better results but are slower to converge.
        binary_search_steps: The number of times we perform binary search to
          find the optimal tradeoff-constant between distance and confidence. 
        max_iterations: The maximum number of iterations. Larger values are more
          accurate; setting too small will require a large learning rate and will
          produce poor results.
        abort_early: If true, allows early aborts if gradient descent gets stuck.
        initial_const: The initial tradeoff-constant to use to tune the relative
          importance of distance and confidence. If binary_search_steps is large,
          the initial constant is not important.
        """

        image_size, num_channels, num_labels = model.image_size, model.num_channels, model.num_labels
        self.model = model
        self.sess = sess
        self.TARGETED = targeted
        self.LEARNING_RATE = learning_rate
        self.MAX_ITERATIONS = max_iterations
        self.print_every = print_every
        self.early_stop_iters = early_stop_iters if early_stop_iters != 0 else max_iterations // 10
        print("early stop:", self.early_stop_iters)
        self.BINARY_SEARCH_STEPS = binary_search_steps
        self.ABORT_EARLY = abort_early
        self.CONFIDENCE = confidence
        self.initial_const = initial_const
        self.start_iter = start_iter
        self.batch_size = batch_size
        self.num_channels = num_channels
        self.resize_init_size = init_size
        self.use_importance = use_importance
        if use_resize:
            self.small_x = self.resize_init_size
            self.small_y = self.resize_init_size
        else:
            self.small_x = image_size
            self.small_y = image_size

        self.use_tanh = use_tanh
        self.use_resize = use_resize
        self.save_ckpts = save_ckpts
        if save_ckpts:
            os.system("mkdir -p {}".format(save_ckpts))

        self.repeat = binary_search_steps >= 10

        # each batch has a different modifier value (see below) to evaluate
        # small_shape = (None,self.small_x,self.small_y,num_channels)
        shape = (None, image_size, image_size, num_channels)
        single_shape = (image_size, image_size, num_channels)
        small_single_shape = (self.small_x, self.small_y, num_channels)

        # the variable we're going to optimize over
        # support multiple batches
        # support any size image, will be resized to model native size
        if self.use_resize:
            self.modifier = tf.placeholder(tf.float32,
                                           shape=(None, None, None, None))
            # scaled up image
            self.scaled_modifier = tf.image.resize_images(
                self.modifier, [image_size, image_size])
            # operator used for resizing image
            self.resize_size_x = tf.placeholder(tf.int32)
            self.resize_size_y = tf.placeholder(tf.int32)
            self.resize_input = tf.placeholder(tf.float32,
                                               shape=(1, None, None, None))
            self.resize_op = tf.image.resize_images(
                self.resize_input, [self.resize_size_x, self.resize_size_y])
        else:
            self.modifier = tf.placeholder(tf.float32,
                                           shape=(None, image_size, image_size,
                                                  num_channels))
            # no resize
            self.scaled_modifier = self.modifier
        # the real variable, initialized to 0
        self.load_checkpoint = load_checkpoint
        if load_checkpoint:
            # if checkpoint is incorrect reshape will fail
            print("Using checkpint", load_checkpoint)
            self.real_modifier = np.load(load_checkpoint).reshape(
                (1, ) + small_single_shape)
        else:
            self.real_modifier = np.zeros((1, ) + small_single_shape,
                                          dtype=np.float32)
        # self.real_modifier = np.random.randn(image_size * image_size * num_channels).astype(np.float32).reshape((1,) + single_shape)
        # self.real_modifier /= np.linalg.norm(self.real_modifier)
        # these are variables to be more efficient in sending data to tf
        # we only work on 1 image at once; the batch is for evaluation loss at different modifiers
        self.timg = tf.Variable(np.zeros(single_shape), dtype=tf.float32)
        self.tlab = tf.Variable(np.zeros(num_labels), dtype=tf.float32)
        self.const = tf.Variable(0.0, dtype=tf.float32)

        # and here's what we use to assign them
        self.assign_timg = tf.placeholder(tf.float32, single_shape)
        self.assign_tlab = tf.placeholder(tf.float32, num_labels)
        self.assign_const = tf.placeholder(tf.float32)

        # the resulting image, tanh'd to keep bounded from -0.5 to 0.5
        # broadcast self.timg to every dimension of modifier
        if use_tanh:
            self.newimg = tf.tanh(self.scaled_modifier + self.timg) / 2
        else:
            self.newimg = self.scaled_modifier + self.timg

        # prediction BEFORE-SOFTMAX of the model
        # now we have output at #batch_size different modifiers
        # the output should have shape (batch_size, num_labels)
        self.output = model.predict(self.newimg)

        # distance to the input data
        if use_tanh:
            self.l2dist = tf.reduce_sum(
                tf.square(self.newimg - tf.tanh(self.timg) / 2), [1, 2, 3])
        else:
            self.l2dist = tf.reduce_sum(tf.square(self.newimg - self.timg),
                                        [1, 2, 3])

        # compute the probability of the label class versus the maximum other
        # self.tlab * self.output selects the Z value of real class
        # because self.tlab is an one-hot vector
        # the reduce_sum removes extra zeros, now get a vector of size #batch_size
        self.real = tf.reduce_sum((self.tlab) * self.output, 1)
        # (1-self.tlab)*self.output gets all Z values for other classes
        # Because soft Z values are negative, it is possible that all Z values are less than 0
        # and we mistakenly select the real class as the max. So we minus 10000 for real class
        self.other = tf.reduce_max(
            (1 - self.tlab) * self.output - (self.tlab * 10000), 1)

        # If self.targeted is true, then the targets represents the target labels.
        # If self.targeted is false, then targets are the original class labels.
        if self.TARGETED:
            if use_log:
                # loss1 = - tf.log(self.real)
                loss1 = tf.maximum(
                    0.0,
                    tf.log(self.other + 1e-30) - tf.log(self.real + 1e-30))
            else:
                # if targetted, optimize for making the other class (real) most likely
                loss1 = tf.maximum(0.0,
                                   self.other - self.real + self.CONFIDENCE)
        else:
            if use_log:
                # loss1 = tf.log(self.real)
                loss1 = tf.maximum(
                    0.0,
                    tf.log(self.real + 1e-30) - tf.log(self.other + 1e-30))
            else:
                # if untargeted, optimize for making this class least likely.
                loss1 = tf.maximum(0.0,
                                   self.real - self.other + self.CONFIDENCE)

        # sum up the losses (output is a vector of #batch_size)
        self.loss2 = self.l2dist
        self.loss1 = self.const * loss1
        self.loss = self.loss1 + self.loss2

        # these are the variables to initialize when we run
        self.setup = []
        self.setup.append(self.timg.assign(self.assign_timg))
        self.setup.append(self.tlab.assign(self.assign_tlab))
        self.setup.append(self.const.assign(self.assign_const))

        # prepare the list of all valid variables
        var_size = self.small_x * self.small_y * num_channels
        self.use_var_len = var_size
        self.var_list = np.array(range(0, self.use_var_len), dtype=np.int32)
        self.used_var_list = np.zeros(var_size, dtype=np.int32)
        self.sample_prob = np.ones(var_size, dtype=np.float32) / var_size

        # upper and lower bounds for the modifier
        self.modifier_up = np.zeros(var_size, dtype=np.float32)
        self.modifier_down = np.zeros(var_size, dtype=np.float32)

        # random permutation for coordinate update
        self.perm = np.random.permutation(var_size)
        self.perm_index = 0

        # ADAM status
        self.mt = np.zeros(var_size, dtype=np.float32)
        self.vt = np.zeros(var_size, dtype=np.float32)
        # self.beta1 = 0.8
        # self.beta2 = 0.99
        self.beta1 = adam_beta1
        self.beta2 = adam_beta2
        self.reset_adam_after_found = reset_adam_after_found
        self.adam_epoch = np.ones(var_size, dtype=np.int32)
        self.stage = 0
        # variables used during optimization process
        self.grad = np.zeros(batch_size, dtype=np.float32)
        self.hess = np.zeros(batch_size, dtype=np.float32)
        # for testing
        self.grad_op = tf.gradients(self.loss, self.modifier)
        # compile numba function
        # self.coordinate_ADAM_numba = jit(coordinate_ADAM, nopython = True)
        # self.coordinate_ADAM_numba.recompile()
        # print(self.coordinate_ADAM_numba.inspect_llvm())
        # np.set_printoptions(threshold=np.nan)
        # set solver
        solver = solver.lower()
        self.solver_name = solver
        if solver == "adam":
            self.solver = coordinate_ADAM
        elif solver == "newton":
            self.solver = coordinate_Newton
        elif solver == "adam_newton":
            self.solver = coordinate_Newton_ADAM
        elif solver != "fake_zero":
            print("unknown solver", solver)
            self.solver = coordinate_ADAM
        print("Using", solver, "solver")
Exemplo n.º 24
0
def get_stage_1(dof_feat, simmat_feat, is_training, bn_decay=None):
    batch_size = dof_feat.get_shape()[0].value

    #task1: key_point
    feat1 = tf_util.conv1d(dof_feat,
                           128,
                           1,
                           padding='VALID',
                           activation_fn=None,
                           scope='stage1/task1/fc1',
                           bn_decay=bn_decay)
    pred_labels_key_p = tf_util.conv1d(feat1,
                                       2,
                                       1,
                                       padding='VALID',
                                       activation_fn=None,
                                       scope='stage1/task1/fc2',
                                       bn_decay=bn_decay)

    #task2_1: labels_direction
    feat2_1 = tf_util.conv1d(dof_feat,
                             128,
                             1,
                             padding='VALID',
                             activation_fn=None,
                             scope='stage1/task2_1/fc1',
                             bn_decay=bn_decay)
    pred_labels_direction = tf_util.conv1d(feat2_1,
                                           15,
                                           1,
                                           padding='VALID',
                                           activation_fn=None,
                                           scope='stage1/task2_1/fc2',
                                           bn_decay=bn_decay)

    #task2_2: regression_direction
    feat2_2 = tf_util.conv1d(dof_feat,
                             128,
                             1,
                             padding='VALID',
                             activation_fn=None,
                             scope='stage1/task2_2/fc1',
                             bn_decay=bn_decay)
    pred_regression_direction = tf_util.conv1d(feat2_2,
                                               3,
                                               1,
                                               padding='VALID',
                                               activation_fn=None,
                                               scope='stage1/task2_2/fc2',
                                               bn_decay=bn_decay)

    #task_3: position
    feat3 = tf_util.conv1d(dof_feat,
                           128,
                           1,
                           padding='VALID',
                           activation_fn=None,
                           scope='stage1/task3/fc1',
                           bn_decay=bn_decay)
    pred_regression_position = tf_util.conv1d(feat3,
                                              3,
                                              1,
                                              padding='VALID',
                                              activation_fn=None,
                                              scope='stage1/task3/fc2',
                                              bn_decay=bn_decay)

    #task_4: dof_type
    feat4 = tf_util.conv1d(dof_feat,
                           128,
                           1,
                           padding='VALID',
                           activation_fn=None,
                           scope='stage1/task4/fc1',
                           bn_decay=bn_decay)
    pred_labels_type = tf_util.conv1d(feat4,
                                      4,
                                      1,
                                      padding='VALID',
                                      activation_fn=None,
                                      scope='stage1/task4/fc2',
                                      bn_decay=bn_decay)

    #task_5: similar matrix
    feat5 = tf_util.conv1d(simmat_feat,
                           128,
                           1,
                           padding='VALID',
                           activation_fn=None,
                           scope='stage1/task_5/fc1',
                           bn_decay=bn_decay)
    r = tf.reduce_sum(feat5 * feat5, 2)
    r = tf.reshape(r, [batch_size, -1, 1])
    D = r - 2 * tf.matmul(feat5, tf.transpose(
        feat5, perm=[0, 2, 1])) + tf.transpose(r, perm=[0, 2, 1])
    pred_simmat = tf.maximum(10 * D, 0.)

    #task_6: confidence map
    feat6 = tf_util.conv1d(simmat_feat,
                           128,
                           1,
                           padding='VALID',
                           activation_fn=None,
                           scope='stage1/task6/fc1',
                           bn_decay=bn_decay)
    conf_logits = tf_util.conv1d(feat6,
                                 1,
                                 1,
                                 padding='VALID',
                                 activation_fn=None,
                                 scope='stage1/task_6/fc2',
                                 bn_decay=bn_decay)
    pred_conf_logits = tf.nn.sigmoid(conf_logits,
                                     name='stage1/task_6/confidence')

    return pred_labels_key_p,pred_labels_direction,pred_regression_direction,pred_regression_position, \
                                             pred_labels_type,pred_simmat,pred_conf_logits
Exemplo n.º 25
0
    def train_rgvn(self, perf_metric):
        """Trains DVRL based on the specified objective function.

        Args:
          perf_metric: 'auc', 'accuracy', 'log-loss' for classification
                       'mae', 'mse', 'rmspe' for regression
        """

        # Generates selected probability
        est_data_value = self.rpm()

        # Generator loss (REINFORCE algorithm)
        prob = tf.reduce_sum(
            self.s_input * tf.log(est_data_value + self.epsilon) +
            (1 - self.s_input) * tf.log(1 - est_data_value + self.epsilon))
        dve_loss = (-self.reward_input * prob) + \
                   1e3 * (tf.maximum(tf.reduce_mean(est_data_value)
                                     - self.threshold, 0) +
                          tf.maximum((1 - self.threshold) -
                                     tf.reduce_mean(est_data_value), 0))

        # Variable
        dve_vars = [
            v for v in tf.trainable_variables()
            if v.name.startswith('data_value_estimator')
        ]

        # Solver
        dve_solver = tf.train.AdamOptimizer(self.learning_rate).minimize(
            dve_loss, var_list=dve_vars)

        LogUtil.log('INFO', "To evaluate x_valid with ori model!")
        # Baseline performance
        print(self.ori_model_path)
        y_valid_hat = eval_sgcn_prediction(self.x_valid, window=4, model_path=self.ori_model_path, \
                                           gpu_id=0, y_test=self.y_valid, predict_batch_size=self.batch_size_predictor)

        if perf_metric == 'auc':
            # valid_perf = metrics.roc_auc_score(self.y_valid, y_valid_hat[:, 1])
            valid_perf = metrics.roc_auc_score(self.y_valid_onehot,
                                               y_valid_hat)
        elif perf_metric == 'accuracy':
            valid_perf = metrics.accuracy_score(self.y_valid,
                                                np.argmax(y_valid_hat, axis=1))
        elif perf_metric == 'log_loss':
            valid_perf = -metrics.log_loss(self.y_valid, y_valid_hat)
        elif perf_metric == 'rmspe':
            valid_perf = rgvn_metrics.rmspe(self.y_valid, y_valid_hat)
        elif perf_metric == 'mae':
            valid_perf = metrics.mean_absolute_error(self.y_valid, y_valid_hat)
        elif perf_metric == 'mse':
            valid_perf = metrics.mean_squared_error(self.y_valid, y_valid_hat)

        LogUtil.log('INFO', "To evaluate x_train with val model!")
        # Prediction differences
        y_train_valid_pred = eval_sgcn_prediction(
            self.x_train,
            window=4,
            model_path=self.val_model_path,
            gpu_id=0,
            y_test=self.y_train,
            predict_batch_size=self.batch_size_predictor)

        if self.problem == 'classification':
            y_pred_diff = np.abs(self.y_train_onehot - y_train_valid_pred)
        elif self.problem == 'regression':
            y_pred_diff = \
                np.abs(self.y_train_onehot - y_train_valid_pred) / \
                self.y_train_onehot

        #Disable GPU Usage
        # os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
        # Main session
        session_conf = tf.ConfigProto(allow_soft_placement=False,
                                      log_device_placement=False)
        sess = tf.Session(config=session_conf)
        sess.run(tf.global_variables_initializer())
        # Model save at the end
        saver = tf.train.Saver(dve_vars)

        for _ in tqdm.tqdm(range(self.outer_iterations)):
            # Batch selection
            batch_idx = \
                np.random.permutation(len(self.x_train))[
                :self.batch_size]
            x_batch = self.x_train[batch_idx]
            y_batch_onehot = self.y_train_onehot[batch_idx]
            y_batch = self.y_train[batch_idx]
            y_hat_batch = y_pred_diff[batch_idx]

            LogUtil.log('INFO', 'hhhhhhhhhhhhhhhhhhhh')
            x_train_class = SGCNData(self.x_train, self.y_train, 4)
            alias_inputs, A, items, node_masks, targets = x_train_class.get_slice(
                batch_idx)

            LogUtil.log('INFO', 'Start to generate selection probability')
            # Generates selection probability
            print(x_batch)
            print(items)
            print(A)
            print(y_input)
            est_dv_curr = sess.run(
                est_data_value,
                feed_dict={
                    self.A: A,
                    # Liu Chenxu add
                    #self.x_input: x_batch,
                    self.items: items,
                    self.node_masks: node_masks,
                    self.y_input: y_batch_onehot,
                    self.y_hat_input: y_hat_batch
                })
            LogUtil.log('INFO', 'End to generate selection probability')
            # Samples the selection probability
            sel_prob_curr = np.random.binomial(1, est_dv_curr,
                                               est_dv_curr.shape)

            # Exception (When selection probability is 0)
            if np.sum(sel_prob_curr) == 0:
                est_dv_curr = 0.5 * np.ones(np.shape(est_dv_curr))
                sel_prob_curr = np.random.binomial(1, est_dv_curr,
                                                   est_dv_curr.shape)

            # Trains predictor
            flatten_sel_prob_curr = sel_prob_curr.flatten()
            weighted_x_batch = x_batch[np.where(flatten_sel_prob_curr > 0)]
            weighted_y_batch = y_batch[np.where(flatten_sel_prob_curr > 0)]
            LogUtil.log('INFO', "Start to train new model.")
            # new_model_batch_size = len(weighted_x_batch)
            new_model_path = train_sgcn(self.hidden_dim, self.label_dim, self.n_nodes, 1, weighted_x_batch,
                                        weighted_y_batch, 50, \
                                        'tmp/sgcn_as_predict_new_model', step_save_model=8, lr=0.001,
                                        epoch=self.inner_iterations)
            LogUtil.log('INFO', "New model training done.")
            LogUtil.log('INFO', new_model_path)
            # Prediction
            y_valid_hat = eval_sgcn_prediction(self.x_valid, window=4, model_path=new_model_path, \
                                               gpu_id=0, y_test=self.y_valid,
                                               predict_batch_size=self.batch_size_predictor)
            LogUtil.log('INFO', "Evaluate with new model done.")
            # Reward computation
            if perf_metric == 'auc':
                rgvn_perf = metrics.roc_auc_score(
                    # self.y_valid, y_valid_hat[:, 1])
                    self.y_valid_onehot,
                    y_valid_hat)
            elif perf_metric == 'accuracy':
                rgvn_perf = metrics.accuracy_score(
                    self.y_valid, np.argmax(y_valid_hat, axis=1))
            elif perf_metric == 'log_loss':
                rgvn_perf = -metrics.log_loss(self.y_valid, y_valid_hat)
            elif perf_metric == 'rmspe':
                rgvn_perf = rgvn_metrics.rmspe(self.y_valid, y_valid_hat)
            elif perf_metric == 'mae':
                rgvn_perf = metrics.mean_absolute_error(
                    self.y_valid, y_valid_hat)
            elif perf_metric == 'mse':
                rgvn_perf = metrics.mean_squared_error(self.y_valid,
                                                       y_valid_hat)

            if self.problem == 'classification':
                reward_curr = rgvn_perf - valid_perf
            elif self.problem == 'regression':
                reward_curr = valid_perf - rgvn_perf

            LogUtil.log('INFO', 'Start to train the generator')
            # Trains the generator
            _, _ = sess.run(
                [dve_solver, dve_loss],
                feed_dict={
                    self.A: A,
                    self.items: items,
                    self.node_masks: node_masks,
                    self.y_input: y_batch_onehot,
                    self.y_hat_input: y_hat_batch,
                    self.s_input: sel_prob_curr,
                    self.reward_input: reward_curr
                })
            LogUtil.log('INFO', 'End to train the generator')
        # Saves trained model
        saver.save(sess, self.checkpoint_file_name)
        LogUtil.log('INFO', "Saved trained rgvn model.")
Exemplo n.º 26
0
def get_stage_1_loss(pred_labels_key_p,pred_labels_direction,pred_regression_direction,pred_regression_position, \
                       pred_labels_type,labels_key_p,labels_direction,regression_direction,regression_position,labels_type,\
                       simmat_pl,neg_simmat_pl,pred_simmat,pred_conf_logits):
    batch_size = pred_labels_key_p.get_shape()[0].value
    num_point = pred_labels_key_p.get_shape()[1].value
    mask = tf.cast(labels_key_p, tf.float32)
    neg_mask = tf.ones_like(mask) - mask
    Np = tf.expand_dims(tf.reduce_sum(mask, axis=1), 1)
    Ng = tf.expand_dims(tf.reduce_sum(neg_mask, axis=1), 1)
    all_mask = tf.ones_like(mask)
    #loss:task1
    task_1_loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=pred_labels_key_p, labels=labels_key_p) * (mask *
                                                              (Ng / Np) + 1))
    task_1_recall = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_key_p,axis=2,output_type = tf.int32),\
                          labels_key_p),tf.float32)*mask,axis = 1)/tf.reduce_sum(mask,axis=1))
    task_1_acc = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_key_p,axis=2,output_type = tf.int32),\
                          labels_key_p),tf.float32),axis = 1)/num_point)
    #loss:task2_1
    task_2_1_loss =  tf.reduce_mean(tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = pred_labels_direction,\
                               labels = labels_direction)*mask,axis = 1)/tf.reduce_sum(mask,axis=1))
    task_2_1_acc = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_direction,axis=2,output_type=tf.int32), \
                               labels_direction),tf.float32)*mask,axis=1)/tf.reduce_sum(mask,axis=1))
    #loss:task2_2
    task_2_2_loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(smooth_l1_dist(pred_regression_direction-regression_direction),axis=2)*mask, \
                               axis = 1)/tf.reduce_sum(mask,axis=1))
    #loss:task3
    task_3_loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(smooth_l1_dist(pred_regression_position-regression_position),axis=2)*mask, \
                               axis = 1)/tf.reduce_sum(mask,axis=1))
    #loss:task4
    task_4_loss = tf.reduce_mean(
        tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=pred_labels_type, labels=labels_type) * mask,
                      axis=1) / tf.reduce_sum(mask, axis=1))
    task_4_acc = tf.reduce_mean(tf.reduce_sum(tf.cast(tf.equal(tf.argmax(pred_labels_type,axis=2,output_type = tf.int32),\
                          labels_type),tf.float32)*mask,axis = 1)/tf.reduce_sum(mask,axis=1))

    #loss: task_5
    pos = pred_simmat * simmat_pl
    neg = tf.maximum(80 - pred_simmat, 0) * neg_simmat_pl
    task_5_loss = tf.reduce_mean(pos + neg)
    #loss: task_6
    ng_label = tf.greater(simmat_pl, 0.5)
    ng = tf.less(pred_simmat, 80)
    epsilon = tf.constant(
        np.ones(ng_label.get_shape()[:2]).astype(np.float32) * 1e-6)
    pts_iou = tf.reduce_sum(tf.cast(tf.logical_and(ng, ng_label), tf.float32), axis=2) / \
                      (tf.reduce_sum(tf.cast(tf.logical_or(ng, ng_label), tf.float32), axis=2) + epsilon)
    task_6_loss = tf.reduce_mean(
        tf.squared_difference(pts_iou, tf.squeeze(pred_conf_logits, [2])))
    w1 = 1
    w2_1 = 1
    w2_2 = 100
    w3 = 100
    w4 = 1
    w5 = 1
    w6 = 100

    loss = task_1_loss * w1 + task_2_1_loss * w2_1 + task_2_2_loss * w2_2 + task_3_loss * w3 + task_4_loss * w4 + task_5_loss * w5 + task_6_loss * w6

    tf.summary.scalar('all loss', loss)
    tf.add_to_collection('losses', loss)
    return task_1_loss, task_1_recall, task_1_acc, task_2_1_loss, task_2_1_acc, task_2_2_loss, task_3_loss, task_4_loss, task_4_acc, task_5_loss, task_6_loss, loss
def loss_fn(features, mode, params):
    """Computes the training loss for depth and egomotion training.

  This function is written with TPU-friendlines in mind.

  Args:
    features: A dictionary mapping strings to tuples of (tf.Tensor, tf.Tensor),
      representing pairs of frames. The loss will be calculated from these
      tensors. The expected endpoints are 'rgb', 'depth', 'intrinsics_mat'
      and 'intrinsics_mat_inv'.
    mode: One of tf.estimator.ModeKeys: TRAIN, PREDICT or EVAL.
    params: A dictionary with hyperparameters that optionally override
      DEFAULT_PARAMS above.

  Returns:
    A dictionary mapping each loss name (see DEFAULT_PARAMS['loss_weights']'s
    keys) to a scalar tf.Tensor representing the respective loss. The total
    training loss.

  Raises:
    ValueError: `features` endpoints that don't conform with their expected
       structure.
  """
    params = parameter_container.ParameterContainer.from_defaults_and_overrides(
        DEFAULT_PARAMS, params, is_strict=True, strictness_depth=2)

    if len(features['rgb']) != 2 or 'depth' in features and len(
            features['depth']) != 2:
        raise ValueError(
            'RGB and depth endpoints are expected to be a tuple of two'
            ' tensors. Rather, they are %s.' % str(features))

    # On tpu we strive to stack tensors together and perform ops once on the
    # entire stack, to save time HBM memory. We thus stack the batch-of-first-
    # frames and the batch-of-second frames, for both depth and RGB. The batch
    # dimension of rgb_stack and gt_depth_stack are thus twice the original batch
    # size.
    rgb_stack = tf.concat(features['rgb'], axis=0)

    depth_predictor = depth_prediction_nets.ResNet18DepthPredictor(
        mode, params.depth_predictor_params.as_dict())
    predicted_depth = depth_predictor.predict_depth(rgb_stack)
    maybe_summary.histogram('PredictedDepth', predicted_depth)

    endpoints = {}
    endpoints['predicted_depth'] = tf.split(predicted_depth, 2, axis=0)
    endpoints['rgb'] = features['rgb']

    # We make the heuristic that depths that are less than 0.2 meters are not
    # accurate. This is a rough placeholder for a confidence map that we're going
    # to have in future.
    if 'depth' in features:
        endpoints['groundtruth_depth'] = features['depth']

    if params.cascade:
        motion_features = [
            tf.concat([features['rgb'][0], endpoints['predicted_depth'][0]],
                      axis=-1),
            tf.concat([features['rgb'][1], endpoints['predicted_depth'][1]],
                      axis=-1)
        ]
    else:
        motion_features = features['rgb']

    motion_features_stack = tf.concat(motion_features, axis=0)
    flipped_motion_features_stack = tf.concat(motion_features[::-1], axis=0)
    # Unlike `rgb_stack`, here we stacked the frames in reverse order along the
    # Batch dimension. By concatenating the two stacks below along the channel
    # axis, we create the following tensor:
    #
    #         Channel dimension (3)
    #   _                                 _
    #  |  Frame1-s batch | Frame2-s batch  |____Batch
    #  |_ Frame2-s batch | Frame1-s batch _|    dimension (0)
    #
    # When we send this tensor to the motion prediction network, the first and
    # second halves of the result represent the camera motion from Frame1 to
    # Frame2 and from Frame2 to Frame1 respectively. Further below we impose a
    # loss that drives these two to be the inverses of one another
    # (cycle-consistency).
    pairs = tf.concat([motion_features_stack, flipped_motion_features_stack],
                      axis=-1)

    rot, trans, residual_translation, intrinsics_mat = (
        object_motion_nets.motion_field_net(
            images=pairs,
            weight_reg=params.motion_prediction_params.weight_reg,
            align_corners=params.motion_prediction_params.align_corners,
            auto_mask=params.motion_prediction_params.auto_mask))

    if params.motion_field_burnin_steps > 0.0:
        step = tf.to_float(tf.train.get_or_create_global_step())
        burnin_steps = tf.to_float(params.motion_field_burnin_steps)
        residual_translation *= tf.clip_by_value(2 * step / burnin_steps - 1,
                                                 0.0, 1.0)

    # If using grouth truth egomotion
    if not params.learn_egomotion:
        egomotion_mat = tf.concat(features['egomotion_mat'], axis=0)
        rot = transform_utils.angles_from_matrix(egomotion_mat[:, :3, :3])
        trans = egomotion_mat[:, :3, 3]
        trans = tf.expand_dims(trans, 1)
        trans = tf.expand_dims(trans, 1)

    if params.use_mask:
        mask = tf.to_float(tf.concat(features['mask'], axis=0) > 0)
        if params.foreground_dilation > 0:
            pool_size = params.foreground_dilation * 2 + 1
            mask = tf.nn.max_pool(mask, [1, pool_size, pool_size, 1], [1] * 4,
                                  'SAME')
        residual_translation *= mask

    maybe_summary.histogram('ResidualTranslation', residual_translation)
    maybe_summary.histogram('BackgroundTranslation', trans)
    maybe_summary.histogram('Rotation', rot)
    endpoints['residual_translation'] = tf.split(residual_translation,
                                                 2,
                                                 axis=0)
    endpoints['background_translation'] = tf.split(trans, 2, axis=0)
    endpoints['rotation'] = tf.split(rot, 2, axis=0)

    if not params.learn_intrinsics.enabled:
        endpoints['intrinsics_mat'] = features['intrinsics_mat']
        endpoints['intrinsics_mat_inv'] = features['intrinsics_mat_inv']
    elif params.learn_intrinsics.per_video:
        int_mat = intrinsics_utils.create_and_fetch_intrinsics_per_video_index(
            features['video_index'][0],
            params.image_preprocessing.image_height,
            params.image_preprocessing.image_width,
            max_video_index=params.learn_intrinsics.max_number_of_videos)
        endpoints['intrinsics_mat'] = tf.concat([int_mat] * 2, axis=0)
        endpoints[
            'intrinsics_mat_inv'] = intrinsics_utils.invert_intrinsics_matrix(
                int_mat)
    else:
        # The intrinsic matrix should be the same, no matter the order of
        # images (mat = inv_mat). It's probably a good idea to enforce this
        # by a loss, but for now we just take their average as a prediction for the
        # intrinsic matrix.
        intrinsics_mat = 0.5 * sum(tf.split(intrinsics_mat, 2, axis=0))
        endpoints['intrinsics_mat'] = [intrinsics_mat] * 2
        endpoints['intrinsics_mat_inv'] = [
            intrinsics_utils.invert_intrinsics_matrix(intrinsics_mat)
        ] * 2

    aggregator = loss_aggregator.DepthMotionFieldLossAggregator(
        endpoints, params.loss_weights.as_dict(), params.loss_params.as_dict())

    # Add some more summaries.
    maybe_summary.image('rgb0', features['rgb'][0])
    maybe_summary.image('rgb1', features['rgb'][1])
    disp0, disp1 = tf.split(aggregator.output_endpoints['disparity'],
                            2,
                            axis=0)
    maybe_summary.image('disparity0/grayscale', disp0)
    maybe_summary.image_with_colormap('disparity0/plasma',
                                      tf.squeeze(disp0, axis=3), 'plasma', 0.0)
    maybe_summary.image('disparity1/grayscale', disp1)
    maybe_summary.image_with_colormap('disparity1/plasma',
                                      tf.squeeze(disp1, axis=3), 'plasma', 0.0)
    if maybe_summary.summaries_enabled():
        if 'depth' in features:
            gt_disp0 = 1.0 / tf.maximum(features['depth'][0], 0.5)
            gt_disp1 = 1.0 / tf.maximum(features['depth'][1], 0.5)
            maybe_summary.image('disparity_gt0', gt_disp0)
            maybe_summary.image('disparity_gt1', gt_disp1)

        depth_proximity_weight0, depth_proximity_weight1 = tf.split(
            aggregator.output_endpoints['depth_proximity_weight'], 2, axis=0)
        maybe_summary.image('consistency_weight0',
                            tf.expand_dims(depth_proximity_weight0, -1))
        maybe_summary.image('consistency_weight1',
                            tf.expand_dims(depth_proximity_weight1, -1))
        maybe_summary.image('trans', aggregator.output_endpoints['trans'])
        maybe_summary.image('trans_inv',
                            aggregator.output_endpoints['inv_trans'])
        maybe_summary.image('trans_res', endpoints['residual_translation'][0])
        maybe_summary.image('trans_res_inv',
                            endpoints['residual_translation'][1])

    return aggregator.losses
Exemplo n.º 28
0
def resize_and_crop_image_v2(image,
                             short_side,
                             long_side,
                             padded_size,
                             aug_scale_min=1.0,
                             aug_scale_max=1.0,
                             seed=1,
                             method=tf.image.ResizeMethod.BILINEAR):
    """Resizes the input image to output size (Faster R-CNN style).

  Resize and pad images given the specified short / long side length and the
  stride size.

  Here are the preprocessing steps.
  1. For a given image, keep its aspect ratio and first try to rescale the short
     side of the original image to `short_side`.
  2. If the scaled image after 1 has a long side that exceeds `long_side`, keep
     the aspect ratio and rescal the long side of the image to `long_side`.
  2. Pad the rescaled image to the padded_size.

  Args:
    image: a `Tensor` of shape [height, width, 3] representing an image.
    short_side: a scalar `Tensor` or `int` representing the desired short side
      to be rescaled to.
    long_side: a scalar `Tensor` or `int` representing the desired long side to
      be rescaled to.
    padded_size: a `Tensor` or `int` list/tuple of two elements representing
      [height, width] of the padded output image size. Padding will be applied
      after scaling the image to the desired_size.
    aug_scale_min: a `float` with range between [0, 1.0] representing minimum
      random scale applied to desired_size for training scale jittering.
    aug_scale_max: a `float` with range between [1.0, inf] representing maximum
      random scale applied to desired_size for training scale jittering.
    seed: seed for random scale jittering.
    method: function to resize input image to scaled image.

  Returns:
    output_image: `Tensor` of shape [height, width, 3] where [height, width]
      equals to `output_size`.
    image_info: a 2D `Tensor` that encodes the information of the image and the
      applied preprocessing. It is in the format of
      [[original_height, original_width], [desired_height, desired_width],
       [y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
      desired_width] is the actual scaled image size, and [y_scale, x_scale] is
      the scaling factor, which is the ratio of
      scaled dimension / original dimension.
  """
    with tf.name_scope('resize_and_crop_image_v2'):
        image_size = tf.cast(tf.shape(image)[0:2], tf.float32)

        scale_using_short_side = (short_side /
                                  tf.minimum(image_size[0], image_size[1]))
        scale_using_long_side = (long_side /
                                 tf.maximum(image_size[0], image_size[1]))

        scaled_size = tf.round(image_size * scale_using_short_side)
        scaled_size = tf.where(
            tf.greater(tf.maximum(scaled_size[0], scaled_size[1]), long_side),
            tf.round(image_size * scale_using_long_side), scaled_size)
        desired_size = scaled_size

        random_jittering = (aug_scale_min != 1.0 or aug_scale_max != 1.0)

        if random_jittering:
            random_scale = tf.random_uniform([],
                                             aug_scale_min,
                                             aug_scale_max,
                                             seed=seed)
            scaled_size = tf.round(random_scale * scaled_size)

        # Computes 2D image_scale.
        image_scale = scaled_size / image_size

        # Selects non-zero random offset (x, y) if scaled image is larger than
        # desired_size.
        if random_jittering:
            max_offset = scaled_size - desired_size
            max_offset = tf.where(tf.less(max_offset, 0),
                                  tf.zeros_like(max_offset), max_offset)
            offset = max_offset * tf.random_uniform([
                2,
            ], 0, 1, seed=seed)
            offset = tf.cast(offset, tf.int32)
        else:
            offset = tf.zeros((2, ), tf.int32)

        scaled_image = tf.image.resize_images(image,
                                              tf.cast(scaled_size, tf.int32),
                                              method=method)

        if random_jittering:
            scaled_image = scaled_image[offset[0]:offset[0] + desired_size[0],
                                        offset[1]:offset[1] +
                                        desired_size[1], :]

        output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0,
                                                    padded_size[0],
                                                    padded_size[1])

        image_info = tf.stack([
            image_size,
            tf.cast(desired_size, dtype=tf.float32), image_scale,
            tf.cast(offset, tf.float32)
        ])
        return output_image, image_info
Exemplo n.º 29
0
def aggregate_task_losses(hparams, problem_hparams, logits, feature_name,
                          feature):
    """Multiproblem loss function."""

    # If no reweighting, we want the default loss to mimic the LM loss.
    if not hparams.multiproblem_reweight_label_loss:
        return aggregate_task_lm_losses(hparams=hparams,
                                        problem_hparams=problem_hparams,
                                        logits=logits,
                                        feature_name=feature_name,
                                        feature=feature)

    summaries = []
    main_task_id = hparams.problem.task_list[0].task_id
    vocab_size = problem_hparams.vocab_size[feature_name]
    if vocab_size is not None and hasattr(hparams, "vocab_divisor"):
        vocab_size += (-vocab_size) % hparams.vocab_divisor
    modality = problem_hparams.modality[feature_name]
    loss = hparams.loss.get(feature_name, modalities.get_loss(modality))
    weights_fn = hparams.weights_fn.get(feature_name,
                                        modalities.get_weights_fn(modality))
    # Primary task loss
    loss_num, loss_den = loss(
        logits, feature,
        lambda x: common_layers.weights_multi_problem_all(x, main_task_id),
        hparams, vocab_size, weights_fn)

    loss_val = loss_num / tf.maximum(1.0, loss_den)
    summaries.append([hparams.problem.task_list[0].name + "_loss", loss_val])

    # Since the losses may undergo rescaling, they cannot exist as separate
    # numerators and denominators. Set the denominators to 1 in order to faciliate
    # loss averaging.
    loss_num = loss_val
    loss_den = tf.minimum(tf.convert_to_tensor(1, dtype=tf.float32), loss_den)

    for task in hparams.problem.task_list[1:]:
        # Loss only from the input sequence -- the auxiliary LM loss.
        seq_loss_num, seq_loss_den = loss(
            logits,
            feature,
            lambda x: common_layers.weights_multi_problem_input(
                x, task.task_id),  # pylint: disable=cell-var-from-loop
            hparams,
            vocab_size)
        seq_loss_num *= problem_hparams.loss_multiplier

        # Unscaled sequence loss.
        seq_loss = seq_loss_num / tf.maximum(1.0, seq_loss_den)
        summaries.append([task.name + "_seq_loss", seq_loss])

        if hasattr(task, "num_classes"):
            # Loss only from the classification label.
            label_loss_num, label_loss_den = loss(
                logits,
                feature,
                lambda x: common_layers.weights_multi_problem(x, task.task_id),  # pylint: disable=cell-var-from-loop
                hparams,
                vocab_size)
            label_loss_num *= problem_hparams.loss_multiplier

            # Unscaled classification label loss.
            label_loss = label_loss_num / tf.maximum(1.0, label_loss_den)
            summaries.append([task.name + "_label_loss", label_loss])

            # Scaling.
            if hparams.multiproblem_reweight_label_loss:
                label_loss *= hparams.multiproblem_label_weight
                seq_loss *= (1 - hparams.multiproblem_label_weight)

            # This is the training loss for the optimizer after scaling.
            task_loss_val = seq_loss + label_loss

            loss_den_ = label_loss_den

        else:
            # Loss only from the target sequence.
            target_loss_num, target_loss_den = loss(
                logits,
                feature,
                lambda x: common_layers.weights_multi_problem(x, task.task_id),  # pylint: disable=cell-var-from-loop
                hparams,
                vocab_size)
            target_loss_num *= problem_hparams.loss_multiplier

            # Unscaled target sequence loss.
            target_loss = target_loss_num / tf.maximum(1.0, target_loss_den)
            summaries.append([task.name + "_target_loss", target_loss])

            # Scaling.
            if hparams.multiproblem_reweight_label_loss:
                target_loss *= hparams.multiproblem_label_weight
                seq_loss *= (1 - hparams.multiproblem_label_weight)

            # This is the training loss for the optimizer after all the scaling.
            task_loss_val = seq_loss + target_loss

            loss_den_ = target_loss_den

        summaries.append([task.name + "_loss", task_loss_val])
        # Adding 1 to the loss den for each task leads to averaging task losses.
        # TODO(urvashik): Fix combination with other task losses - weighted
        # average based on the number of examples from that task.
        loss_num += task_loss_val
        loss_den += tf.minimum(tf.convert_to_tensor(1, dtype=tf.float32),
                               loss_den_)

    return loss_num, loss_den, summaries
Exemplo n.º 30
0
def multilevel_crop_and_resize(features, boxes, output_size=7):
    """Crop and resize on multilevel feature pyramid.

  Generate the (output_size, output_size) set of pixels for each input box
  by first locating the box into the correct feature level, and then cropping
  and resizing it using the correspoding feature map of that level.

  Args:
    features: A dictionary with key as pyramid level and value as features. The
      features are in shape of [batch_size, height_l, width_l, num_filters].
    boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row represents
      a box with [y1, x1, y2, x2] in un-normalized coordinates.
    output_size: A scalar to indicate the output crop size.

  Returns:
    A 5-D tensor representing feature crop of shape
    [batch_size, num_boxes, output_size, output_size, num_filters].
  """

    with tf.name_scope('multilevel_crop_and_resize'):
        levels = list(features.keys())
        min_level = min(levels)
        max_level = max(levels)
        batch_size, max_feature_height, max_feature_width, num_filters = (
            features[min_level].get_shape().as_list())
        _, num_boxes, _ = boxes.get_shape().as_list()

        # Stack feature pyramid into a features_all of shape
        # [batch_size, levels, height, width, num_filters].
        features_all = []
        feature_heights = []
        feature_widths = []
        for level in range(min_level, max_level + 1):
            shape = features[level].get_shape().as_list()
            feature_heights.append(shape[1])
            feature_widths.append(shape[2])
            # Concat tensor of [batch_size, height_l * width_l, num_filters] for each
            # levels.
            features_all.append(
                tf.reshape(features[level], [batch_size, -1, num_filters]))
            features_r2 = tf.reshape(tf.concat(features_all, 1),
                                     [-1, num_filters])

        # Calculate height_l * width_l for each level.
        level_dim_sizes = [
            feature_widths[i] * feature_heights[i]
            for i in range(len(feature_widths))
        ]
        # level_dim_offsets is accumulated sum of level_dim_size.
        level_dim_offsets = [0]
        for i in range(len(feature_widths) - 1):
            level_dim_offsets.append(level_dim_offsets[i] + level_dim_sizes[i])
        batch_dim_size = level_dim_offsets[-1] + level_dim_sizes[-1]
        level_dim_offsets = tf.constant(level_dim_offsets, tf.int32)
        height_dim_sizes = tf.constant(feature_widths, tf.int32)

        # Assigns boxes to the right level.
        box_width = boxes[:, :, 3] - boxes[:, :, 1]
        box_height = boxes[:, :, 2] - boxes[:, :, 0]
        areas_sqrt = tf.sqrt(box_height * box_width)
        levels = tf.cast(
            tf.floordiv(tf.log(tf.div(areas_sqrt, 224.0)), tf.log(2.0)) + 4.0,
            dtype=tf.int32)
        # Maps levels between [min_level, max_level].
        levels = tf.minimum(max_level, tf.maximum(levels, min_level))

        # Projects box location and sizes to corresponding feature levels.
        scale_to_level = tf.cast(tf.pow(tf.constant(2.0),
                                        tf.cast(levels, tf.float32)),
                                 dtype=boxes.dtype)
        boxes /= tf.expand_dims(scale_to_level, axis=2)
        box_width /= scale_to_level
        box_height /= scale_to_level
        boxes = tf.concat([
            boxes[:, :, 0:2],
            tf.expand_dims(box_height, -1),
            tf.expand_dims(box_width, -1)
        ],
                          axis=-1)

        # Maps levels to [0, max_level-min_level].
        levels -= min_level
        level_strides = tf.pow([[2.0]], tf.cast(levels, tf.float32))
        boundary = tf.cast(
            tf.concat([
                tf.expand_dims([[tf.cast(max_feature_height, tf.float32)]] /
                               level_strides - 1,
                               axis=-1),
                tf.expand_dims([[tf.cast(max_feature_width, tf.float32)]] /
                               level_strides - 1,
                               axis=-1),
            ],
                      axis=-1), boxes.dtype)

        # Compute grid positions.
        kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = compute_grid_positions(
            boxes, boundary, output_size, sample_offset=0.5)

        x_indices = tf.cast(tf.reshape(
            box_gridx0x1, [batch_size, num_boxes, output_size * 2]),
                            dtype=tf.int32)
        y_indices = tf.cast(tf.reshape(
            box_gridy0y1, [batch_size, num_boxes, output_size * 2]),
                            dtype=tf.int32)

        batch_size_offset = tf.tile(
            tf.reshape(
                tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]),
            [1, num_boxes, output_size * 2, output_size * 2])
        # Get level offset for each box. Each box belongs to one level.
        levels_offset = tf.tile(
            tf.reshape(tf.gather(level_dim_offsets, levels),
                       [batch_size, num_boxes, 1, 1]),
            [1, 1, output_size * 2, output_size * 2])
        y_indices_offset = tf.tile(
            tf.reshape(
                y_indices *
                tf.expand_dims(tf.gather(height_dim_sizes, levels), -1),
                [batch_size, num_boxes, output_size * 2, 1]),
            [1, 1, 1, output_size * 2])
        x_indices_offset = tf.tile(
            tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
            [1, 1, output_size * 2, 1])
        indices = tf.reshape(
            batch_size_offset + levels_offset + y_indices_offset +
            x_indices_offset, [-1])

        # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar
        # performance.
        features_per_box = tf.reshape(tf.gather(features_r2, indices), [
            batch_size, num_boxes, output_size * 2, output_size * 2,
            num_filters
        ])

        # Bilinear interpolation.
        features_per_box = feature_bilinear_interpolation(
            features_per_box, kernel_y, kernel_x)
        return features_per_box