示例#1
0
 def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, biased=True):
     # Verify that the padding is acceptable
     self.validate_padding(padding)
     # Get the number of channels in the input
     c_i = input.get_shape()[-1]
     # Verify that the grouping parameter is valid
     assert c_i % group == 0
     assert c_o % group == 0
     # Convolution for a given input and kernel
     convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
     with tf.variable_scope(name) as scope:
         kernel = self.make_var("weights", shape=[k_h, k_w, c_i / group, c_o])
         if group == 1:
             # This is the common-case. Convolve the input without any further complications.
             output = convolve(input, kernel)
         else:
             # Split the input into groups and then convolve each of them independently
             input_groups = tf.split(3, group, input)
             kernel_groups = tf.split(3, group, kernel)
             output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
             # Concatenate the groups
             output = tf.concat(3, output_groups)
         # Add the biases
         if biased:
             biases = self.make_var("biases", [c_o])
             output = tf.nn.bias_add(output, biases)
         if relu:
             # ReLU non-linearity
             output = tf.nn.relu(output, name=scope.name)
         return output
def compute_IOU(bboxA, bboxB):
    """Compute the Intersection Over Union.
    Args:
        bboxA: [N X 4 tensor] format = [left, top, right, bottom]
        bboxB: [N X 4 tensor] 

    Return:
        IOU: [N X 1 tensor]
    """

    x1A, y1A, x2A, y2A = tf.split(1, 4, bboxA)
    x1B, y1B, x2B, y2B = tf.split(1, 4, bboxB)

    # compute intersection
    x1_max = tf.maximum(x1A, x1B)
    y1_max = tf.maximum(y1A, y1B)
    x2_min = tf.minimum(x2A, x2B)
    y2_min = tf.minimum(y2A, y2B)

    # overlap_flag = tf.logical_and( tf.less(x1_max, x2_min), tf.less(y1_max, y2_min))

    overlap_flag = tf.to_float(tf.less(x1_max, x2_min)) * \
        tf.to_float(tf.less(y1_max, y2_min))

    overlap_area = tf.mul(overlap_flag, tf.mul(
        x2_min - x1_max, y2_min - y1_max))

    # compute union
    areaA = tf.mul(x2A - x1A, y2A - y1A)
    areaB = tf.mul(x2B - x1B, y2B - y1B)
    union_area = areaA + areaB - overlap_area

    return tf.div(overlap_area, union_area)
示例#3
0
def make_example_dict(example_protos, example_weights):
    def parse_examples(example_protos):
        features = {
            "target": tf.FixedLenFeature(shape=[1], dtype=tf.float32, default_value=0),
            "age_indices": tf.VarLenFeature(dtype=tf.int64),
            "age_values": tf.VarLenFeature(dtype=tf.float32),
            "gender_indices": tf.VarLenFeature(dtype=tf.int64),
            "gender_values": tf.VarLenFeature(dtype=tf.float32),
        }
        return tf.parse_example([e.SerializeToString() for e in example_protos], features)

    parsed = parse_examples(example_protos)
    sparse_features = [
        SparseFeatureColumn(
            tf.reshape(tf.split(1, 2, parsed["age_indices"].indices)[0], [-1]),
            tf.reshape(parsed["age_indices"].values, [-1]),
            tf.reshape(parsed["age_values"].values, [-1]),
        ),
        SparseFeatureColumn(
            tf.reshape(tf.split(1, 2, parsed["gender_indices"].indices)[0], [-1]),
            tf.reshape(parsed["gender_indices"].values, [-1]),
            tf.reshape(parsed["gender_values"].values, [-1]),
        ),
    ]
    return dict(
        sparse_features=sparse_features,
        dense_features=[],
        example_weights=example_weights,
        example_labels=tf.reshape(parsed["target"], [-1]),
        example_ids=["%d" % i for i in range(0, len(example_protos))],
    )
示例#4
0
 def _composition_function(self, inputs, length, init_state=None):
     if self._composition == "GRU":
         cell = GRUCell(self._size)
         return dynamic_rnn(cell, inputs, sequence_length=length, time_major=True,
                            initial_state=init_state, dtype=tf.float32)[0]
     elif self._composition == "LSTM":
         cell = BasicLSTMCell(self._size)
         init_state = tf.concat(1, [tf.zeros_like(init_state, tf.float32), init_state]) if init_state else None
         outs = dynamic_rnn(cell, inputs, sequence_length=length, time_major=True,
                            initial_state=init_state, dtype=tf.float32)[0]
         return outs
     elif self._composition == "BiGRU":
         cell = GRUCell(self._size // 2, self._size)
         init_state_fw, init_state_bw = tf.split(1, 2, init_state) if init_state else (None, None)
         with tf.variable_scope("forward"):
             fw_outs = dynamic_rnn(cell, inputs, sequence_length=length, time_major=True,
                                   initial_state=init_state_fw, dtype=tf.float32)[0]
         with tf.variable_scope("backward"):
             rev_inputs = tf.reverse_sequence(tf.pack(inputs), length, 0, 1)
             rev_inputs = [tf.reshape(x, [-1, self._size]) for x in tf.split(0, len(inputs), rev_inputs)]
             bw_outs = dynamic_rnn(cell, rev_inputs, sequence_length=length, time_major=True,
                                   initial_state=init_state_bw, dtype=tf.float32)[0]
             bw_outs = tf.reverse_sequence(tf.pack(bw_outs), length, 0, 1)
             bw_outs = [tf.reshape(x, [-1, self._size]) for x in tf.split(0, len(inputs), bw_outs)]
         return [tf.concat(1, [fw_out, bw_out]) for fw_out, bw_out in zip(fw_outs, bw_outs)]
     else:
         raise NotImplementedError("Other compositions not implemented yet.")
    def build(self):
        """None
        Build the model graph
        :return:
        """
        with tf.name_scope('G_'):
            self.predict_g = self.__G__()
            self.predict_g2 = self.__G2__()

        with tf.name_scope('D_'):

            # Create reference examples
            # Input d holds real&imaginary values. The discriminative decision based on reconstructed image
            self.reconstructed_image_reference = self.get_reconstructed_image(real=self.input_d['real'],
                                                                              imag=self.input_d['imag'], name='Both_gt')

            predict_g2_stacked = tf.stack([self.predict_g2['real'][:,0,:,:], self.predict_g2['imag'][:,0,:,:]], axis=1)

            self.predict, self.predict_logits = self.__D__([self.reconstructed_image_reference, predict_g2_stacked])

            self.predict_d, self.predict_d_for_g = tf.split(value=self.predict, num_or_size_splits=2, axis=0)
            self.predict_d_logits, self.predict_d_logits_for_g = tf.split(value=self.predict_logits,
                                                                          num_or_size_splits=2, axis=0)
            self.clip_weights = self.__clip_weights__()

        with tf.name_scope('loss'):
            # self.loss_g = self.__loss_g__(predict=self.predict_g, self.labels, reg=self.regularization_sum)
            self.__loss__()

        with tf.name_scope('training'):
            self.train_op_d, self.train_op_g = self.__training__(learning_rate=self.FLAGS.learning_rate)

        with tf.name_scope('evaluation'):
            # Calculate accuracy L2 norm
            self.evaluation = self.__evaluation__(predict=self.predict_g, labels=self.labels)
示例#6
0
    def build(self):
        """None
        Build the model graph
        :return:
        """
        with tf.name_scope('G_'):
            self.predict_g = self.__G__()

        with tf.name_scope('D_'):
            self.predict, self.predict_logits = self.__D__([self.input_d, self.predict_g], input_type="Real")

            self.predict_d, self.predict_d_for_g = tf.split(value=self.predict, num_or_size_splits=2, axis=0)
            self.predict_d_logits, self.predict_d_logits_for_g = tf.split(value=self.predict_logits,
                                                                          num_or_size_splits=2, axis=0)

            # self.predict_d, self.predict_d_logits
            # with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            #     self.predict_d_for_g, self.predict_d_logits_for_g = self.__D__(self.predict_g, input_type="Gen")

            if len(self.regularization_values_d) > 0:
                self.regularization_sum_d = sum(self.regularization_values_d)

        with tf.name_scope('loss'):
            # self.loss_g = self.__loss_g__(predict=self.predict_g, self.labels, reg=self.regularization_sum)
            self.__loss__()

        with tf.name_scope('training'):
            self.train_op_d, self.train_op_g = self.__training__(learning_rate=self.FLAGS.learning_rate)

        with tf.name_scope('evaluation'):
            # Calculate accuracy L2 norm
            self.evaluation = self.__evaluation__(predict=self.predict_g, labels=self.labels)
示例#7
0
        def _g_recurrence_1(i, x_t, input_x, gen_x, h_tm1, h_tm1_manager, last_goal, real_goal, give_num):
            cur_sen = \
            tf.split(tf.concat([tf.split(input_x, [i, self.sequence_length - i], 1)[0], self.padding_array], 1),
                     [self.sequence_length, i], 1)[0]
            with tf.variable_scope(self.scope):
                feature = self.FeatureExtractor_unit(cur_sen, self.drop_out)

            h_t_manager = self.g_manager_recurrent_unit(feature, h_tm1_manager)
            sub_goal = self.g_manager_output_unit(h_t_manager)
            sub_goal = tf.nn.l2_normalize(sub_goal, 1)

            h_t_Worker = tf.cond(i > 0, lambda: self.g_worker_recurrent_unit(x_t, h_tm1),
                                 lambda: h_tm1)  # hidden_memory_tuple

            real_sub_goal = tf.cond(i > 0, lambda: tf.add(last_goal, sub_goal), lambda: real_goal)
            # real_goal_array = real_goal_array.write(i, real_sub_goal)

            x_tp1 = tf.cond(i > 0, lambda: ta_emb_x.read(i - 1), lambda: x_t)

            # hidden_memory_tuple
            with tf.control_dependencies([cur_sen]):
                gen_x = tf.cond(i > 0, lambda: gen_x.write(i - 1, ta_x.read(i - 1)), lambda: gen_x)
            return i + 1, x_tp1, input_x, gen_x, h_t_Worker, h_t_manager, \
                   tf.cond(((i) % self.step_size) > 0, lambda: real_sub_goal,
                           lambda: tf.constant(0.0, shape=[self.batch_size, self.goal_out_size])), \
                   tf.cond(((i) % self.step_size) > 0, lambda: real_goal, lambda: real_sub_goal), give_num
示例#8
0
 def testSymbolModalityTargetsFactored(self):
   batch_size = 10
   num_datashards = 5
   length = 6
   height = 7
   hidden_size = 9
   vocab_size = 11
   model_hparams = common_hparams.basic_params1()
   model_hparams.factored_logits = True
   model_hparams.hidden_size = hidden_size
   model_hparams.mode = tf.estimator.ModeKeys.TRAIN
   body_output = -1 + np.random.random_integers(
       100, size=(batch_size, length, height, hidden_size))
   targets = -1 + np.random.random_integers(
       vocab_size, size=(batch_size, length, height, 1))
   m = modalities.SymbolModality(model_hparams, vocab_size)
   data_parallelism = expert_utils.Parallelism(
       ["/device:CPU:0"] * num_datashards)
   with self.test_session() as session:
     sharded_body_output = tf.split(tf.to_float(body_output), num_datashards)
     sharded_targets = tf.split(targets, num_datashards)
     sharded_logits = m.top_sharded(sharded_body_output, sharded_targets,
                                    data_parallelism)
     train_loss = m.loss_sharded(sharded_logits, sharded_targets,
                                 data_parallelism)
     logits = tf.concat(sharded_logits, 0)
     session.run(tf.global_variables_initializer())
     res1, res2 = session.run((logits, train_loss))
   self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size))
   self.assertEqual(res2.shape, ())
示例#9
0
  def test_backward_grads_with_nativepy(self):
    if not tf.test.is_gpu_available():
      self.skipTest("GPU not available")

    input_shape = (128, 8, 8)
    data_shape = (16,) + input_shape
    x = tf.random_normal(shape=data_shape, dtype=tf.float64)
    dy = tf.random_normal(shape=data_shape, dtype=tf.float64)
    dy1, dy2 = tf.split(dy, num_or_size_splits=2, axis=1)
    block = blocks.RevBlock(
        n_res=3,
        filters=128,
        strides=(1, 1),
        input_shape=input_shape,
        fused=False,
        dtype=tf.float64)
    with tf.GradientTape() as tape:
      tape.watch(x)
      x1, x2 = tf.split(x, num_or_size_splits=2, axis=1)
      y1, y2 = block((x1, x2), training=True)
      y = tf.concat((y1, y2), axis=1)

    # Compute true grads
    dx_true = tape.gradient(y, x, output_gradients=dy)

    # Compute grads from reconstruction
    (dx1, dx2), _ = block.backward_grads(
        x=(x1, x2), y=(y1, y2), dy=(dy1, dy2), training=True)
    dx = tf.concat((dx1, dx2), axis=1)

    thres = 1e-5
    diff_abs = tf.reshape(abs(dx - dx_true), [-1])
    assert all(diff_abs < thres)
示例#10
0
  def _model_fn(features, labels, mode, params):
    model_fn = MODELS[FLAGS.model].model_fn

    global_step = tf.train.get_or_create_global_step()

    if FLAGS.num_gpus > 0 and mode == learn.ModeKeys.TRAIN:
      split_features = {k: tf.split(v, FLAGS.num_gpus)
                        for k, v in features.iteritems()}
      split_labels = {k: tf.split(v, FLAGS.num_gpus)
                      for k, v in labels.iteritems()}
      grads = []
      predictions = collections.defaultdict(list)
      losses = []

      opt = ops.create_optimizer(
        params.optimizer, params.learning_rate, params.decay_steps)

      for i in range(FLAGS.num_gpus):
        with tf.device(tf.DeviceSpec(device_type='GPU', device_index=i)):
          with tf.name_scope('tower_%d' % i):
            with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
              device_features = {k: v[i] for k, v in split_features.iteritems()}
              device_labels = {k: v[i] for k, v in split_labels.iteritems()}

              device_predictions, device_loss = model_fn(
                device_features, device_labels, mode, params)

              for k, v in device_predictions.iteritems():
                predictions[k].append(v)

              if device_loss is not None:
                losses.append(device_loss)

              device_grads = opt.compute_gradients(device_loss)
              grads.append(device_grads)

      grads = ops.average_gradients(grads)
      train_op = opt.apply_gradients(grads, global_step=global_step)

      for k, v in predictions.iteritems():
        predictions[k] = tf.concat(v, axis=0)

      loss = tf.add_n(losses) if losses else None
    else:
      with tf.device(tf.DeviceSpec(device_type='GPU', device_index=0)):
        predictions, loss = model_fn(features, labels, mode, params)

        train_op = None
        if mode == learn.ModeKeys.TRAIN:
          opt = ops.create_optimizer(
            params.optimizer, params.learning_rate, params.decay_steps)
          train_op = opt.minimize(loss, global_step=global_step)

    tf.summary.scalar('loss/loss', loss)

    return tf.contrib.learn.ModelFnOps(
      mode=mode,
      predictions=predictions,
      loss=loss,
      train_op=train_op)
示例#11
0
  def backward_grads(self, y, dy, training=True):
    """Manually compute backward gradients given input and output grads."""
    dy1, dy2 = tf.split(dy, num_or_size_splits=2, axis=self.axis)
    y1, y2 = tf.split(y, num_or_size_splits=2, axis=self.axis)

    with tf.GradientTape() as gtape:
      gtape.watch(y1)
      gy1 = self.g(y1, training=training)
    grads_combined = gtape.gradient(
        gy1, [y1] + self.g.trainable_variables, output_gradients=dy2)
    dg = grads_combined[1:]
    dx1 = dy1 + grads_combined[0]
    # This doesn't affect eager execution, but improves memory efficiency with
    # graphs
    with tf.control_dependencies(dg + [dx1]):
      x2 = y2 - gy1

    with tf.GradientTape() as ftape:
      ftape.watch(x2)
      fx2 = self.f(x2, training=training)
    grads_combined = ftape.gradient(
        fx2, [x2] + self.f.trainable_variables, output_gradients=dx1)
    df = grads_combined[1:]
    dx2 = dy2 + grads_combined[0]
    # Same behavior as above
    with tf.control_dependencies(df + [dx2]):
      x1 = y1 - fx2

    x = tf.concat([x1, x2], axis=self.axis)
    dx = tf.concat([dx1, dx2], axis=self.axis)
    grads = df + dg

    return x, dx, grads
示例#12
0
  def infer(self, features, *args, **kwargs):  # pylint: disable=arguments-differ
    """Produce predictions from the model."""
    del args, kwargs
    # Inputs and features preparation needed to handle edge cases.
    if not features:
      features = {}
    inputs_old = None
    if "inputs" in features and len(features["inputs"].shape) < 4:
      inputs_old = features["inputs"]
      features["inputs"] = tf.expand_dims(features["inputs"], 2)

    # Set targets to input size firts.
    features["targets"] = tf.zeros_like(features["inputs"])
    self._encode_on_predict = True
    logits, _ = self(features)  # pylint: disable=not-callable
    if self.hparams.gan_loss_factor != 0:
      logits, _ = tf.split(logits, 2, axis=0)  # Remove GAN.
    logits, _ = tf.split(logits, 2, axis=0)  # Targets and inputs from encoding.
    # Uncomment the line below to get reconstructed inputs instead of targets.
    # (and comment out the line above at the same time).
    # _, logits = tf.split(logits, 2, axis=0)
    samples = tf.argmax(logits, axis=-1)

    # Restore inputs to not confuse Estimator in edge cases.
    if inputs_old is not None:
      features["inputs"] = inputs_old

    # Return samples.
    return samples
示例#13
0
  def call(self, x, mask=None):
    """Execute this layer on input tensors.
    
    x = [atom_features, atom_mask]
    
    Parameters
    ----------
    x: list
      Tensors as listed above
    mask: bool, optional
      Ignored. Present only to shadow superclass call() method.

    Returns
    -------
    outputs: Tensor
      Tensor of concatenated atom features
    """
    self.build()
    atom_features = x[0]
    atom_masks = x[1]
    A = tf.split(atom_features, self.batch_size, axis=0)
    A_mask = tf.split(
        tf.cast(atom_masks, dtype=tf.bool), self.batch_size, axis=0)
    outputs = tf.concat(
        [tf.boolean_mask(A[i], A_mask[i]) for i in range(len(A))], axis=0)
    outputs = tf.matmul(outputs, self.W) + self.b
    outputs = self.activation(outputs)
    return outputs
示例#14
0
def lnlstm(xs, ms, s, scope, nh, init_scale=1.0):
    nbatch, nin = [v.value for v in xs[0].get_shape()]
    with tf.variable_scope(scope):
        wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale))
        gx = tf.get_variable("gx", [nh*4], initializer=tf.constant_initializer(1.0))
        bx = tf.get_variable("bx", [nh*4], initializer=tf.constant_initializer(0.0))

        wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale))
        gh = tf.get_variable("gh", [nh*4], initializer=tf.constant_initializer(1.0))
        bh = tf.get_variable("bh", [nh*4], initializer=tf.constant_initializer(0.0))

        b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0))

        gc = tf.get_variable("gc", [nh], initializer=tf.constant_initializer(1.0))
        bc = tf.get_variable("bc", [nh], initializer=tf.constant_initializer(0.0))

    c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
    for idx, (x, m) in enumerate(zip(xs, ms)):
        c = c*(1-m)
        h = h*(1-m)
        z = _ln(tf.matmul(x, wx), gx, bx) + _ln(tf.matmul(h, wh), gh, bh) + b
        i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z)
        i = tf.nn.sigmoid(i)
        f = tf.nn.sigmoid(f)
        o = tf.nn.sigmoid(o)
        u = tf.tanh(u)
        c = f*c + i*u
        h = o*tf.tanh(_ln(c, gc, bc))
        xs[idx] = h
    s = tf.concat(axis=1, values=[c, h])
    return xs, s
示例#15
0
 def add_training_loss(self, final_loss, logits):
   """Computes loss using logits."""
   loss_fn = get_loss_fn(final_loss)  # Get loss function
   task_losses = []
   # label_placeholder of shape (batch_size, n_tasks). Split into n_tasks
   # tensors of shape (batch_size,)
   task_labels = tf.split(
       axis=1, num_or_size_splits=self.n_tasks, value=self.label_placeholder)
   task_weights = tf.split(
       axis=1, num_or_size_splits=self.n_tasks, value=self.weight_placeholder)
   for task in range(self.n_tasks):
     task_label_vector = task_labels[task]
     task_weight_vector = task_weights[task]
     # Convert the labels into one-hot vector encodings.
     one_hot_labels = tf.to_float(
         tf.one_hot(tf.to_int32(tf.squeeze(task_label_vector)), 2))
     # Since we use tf.nn.softmax_cross_entropy_with_logits note that we pass in
     # un-softmaxed logits rather than softmax outputs.
     task_loss = loss_fn(logits[task], one_hot_labels, task_weight_vector)
     task_losses.append(task_loss)
   # It's ok to divide by just the batch_size rather than the number of nonzero
   # examples (effect averages out)
   total_loss = tf.add_n(task_losses)
   total_loss = tf.div(total_loss, self.batch_size)
   return total_loss
示例#16
0
def remove_channels(x, data_format='NHWC'):
    b, h, w, c = get_conv_shape(x, data_format)
    if data_format == 'NCHW':
        x, _ = tf.split(x, [3, -1], axis=1)
    else:
        x, _ = tf.split(x, [3, -1], axis=3)
    return x
 def testSymbolModalityTargets(self):
   batch_size = 10
   num_datashards = 5
   length = 6
   height = 7
   hidden_size = 9
   vocab_size = 11
   model_hparams = tf.contrib.training.HParams(
       symbol_modality_num_shards=4,
       hidden_size=hidden_size,
       label_smoothing=0.2,
       shared_embedding_and_softmax_weights=0)
   body_output = -1 + np.random.random_integers(
       100, size=(batch_size, length, height, hidden_size))
   targets = -1 + np.random.random_integers(
       vocab_size, size=(batch_size, length, height, 1))
   m = modalities.SymbolModality(model_hparams, vocab_size)
   data_parallelism = expert_utils.Parallelism(
       ["/device:CPU:0"] * num_datashards, reuse=True)
   with self.test_session() as session:
     sharded_body_output = tf.split(tf.to_float(body_output), num_datashards)
     sharded_targets = tf.split(targets, num_datashards)
     sharded_logits, train_loss = m.top_sharded(
         sharded_body_output, sharded_targets, data_parallelism)
     logits = tf.concat(sharded_logits, 0)
     session.run(tf.global_variables_initializer())
     res1, res2 = session.run((logits, train_loss))
   self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size))
   self.assertEqual(res2.shape, ())
示例#18
0
文件: rnn.py 项目: ThierryGrb/magenta
  def __call__(self, x, state, scope=None):
    with tf.variable_scope(scope or type(self).__name__):
      c, h = tf.split(state, 2, 1)

      x_size = x.get_shape().as_list()[1]

      w_init = None  # uniform

      h_init = lstm_ortho_initializer(1.0)

      # Keep W_xh and W_hh separate here as well to use different init methods.
      w_xh = tf.get_variable(
          'W_xh', [x_size, 4 * self.num_units], initializer=w_init)
      w_hh = tf.get_variable(
          'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init)
      bias = tf.get_variable(
          'bias', [4 * self.num_units],
          initializer=tf.constant_initializer(0.0))

      concat = tf.concat([x, h], 1)
      w_full = tf.concat([w_xh, w_hh], 0)
      hidden = tf.matmul(concat, w_full) + bias

      i, j, f, o = tf.split(hidden, 4, 1)

      if self.use_recurrent_dropout:
        g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob)
      else:
        g = tf.tanh(j)

      new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g
      new_h = tf.tanh(new_c) * tf.sigmoid(o)

      return new_h, tf.concat([new_c, new_h], 1)  # fuk tuples.
    def forward_backward(self, obs_prob_seq):
        """
        runs forward backward algorithm on observation sequence

        Arguments
        ---------
        - obs_seq : matrix of size N by S, where N is number of timesteps and
            S is the number of states

        Returns
        -------
        - forward : matrix of size N by S representing
            the forward probability of each state at each time step
        - backward : matrix of size N by S representing
            the backward probability of each state at each time step
        - posterior : matrix of size N by S representing
            the posterior probability of each state at each time step
        """
        obs_prob_list_for = tf.split(0, self.N, obs_prob_seq)
        
        with tf.name_scope('forward_belief_propagation'):
            # forward belief propagation
            self._forward(obs_prob_list_for)

        obs_prob_seq_rev = tf.reverse(obs_prob_seq, [True, False])
        obs_prob_list_back = tf.split(0, self.N, obs_prob_seq_rev)

        with tf.name_scope('backward_belief_propagation'):
            # backward belief propagation
            self._backward(obs_prob_list_back)
 def minibatch(self, dataset, subset, use_datasets, cache_data,
               shift_ratio=-1):
     """Get synthetic image batches.
     """
     del subset, use_datasets, cache_data, shift_ratio
     input_shape = [self.batch_size, self.height, self.width, self.depth]
     images = tf.truncated_normal(
         input_shape,
         dtype=self.dtype,
         stddev=1e-1,
         name='synthetic_images')
     labels = tf.random_uniform(
         [self.batch_size],
         minval=0,
         maxval=dataset.num_classes - 1,
         dtype=tf.int32,
         name='synthetic_labels')
     # Note: This results in a H2D copy, but no computation
     # Note: This avoids recomputation of the random values, but still
     #         results in a H2D copy.
     images = tf.contrib.framework.local_variable(images, name='images')
     labels = tf.contrib.framework.local_variable(labels, name='labels')
     if self.num_splits == 1:
         images_splits = [images]
         labels_splits = [labels]
     else:
         images_splits = tf.split(images, self.num_splits, 0)
         labels_splits = tf.split(labels, self.num_splits, 0)
     return images_splits, labels_splits
示例#21
0
文件: rnn.py 项目: ThierryGrb/magenta
  def __call__(self, x, state, timestep=0, scope=None):
    with tf.variable_scope(scope or type(self).__name__):
      h, c = tf.split(state, 2, 1)

      h_size = self.num_units
      x_size = x.get_shape().as_list()[1]
      batch_size = x.get_shape().as_list()[0]

      w_init = None  # uniform

      h_init = lstm_ortho_initializer(1.0)

      w_xh = tf.get_variable(
          'W_xh', [x_size, 4 * self.num_units], initializer=w_init)
      w_hh = tf.get_variable(
          'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init)

      concat = tf.concat([x, h], 1)  # concat for speed.
      w_full = tf.concat([w_xh, w_hh], 0)
      concat = tf.matmul(concat, w_full)  #+ bias # live life without garbage.

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      concat = layer_norm_all(concat, batch_size, 4, h_size, 'ln_all')
      i, j, f, o = tf.split(concat, 4, 1)

      if self.use_recurrent_dropout:
        g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob)
      else:
        g = tf.tanh(j)

      new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g
      new_h = tf.tanh(layer_norm(new_c, h_size, 'ln_c')) * tf.sigmoid(o)

    return new_h, tf.concat([new_h, new_c], 1)
示例#22
0
 def _read(self, keys, redundant_states):
     read = _comp_mul(keys, redundant_states)
     if self._num_copies > 1:
         xs_real = tf.split(1, self._num_copies, _comp_real(read))
         xs_imag = tf.split(1, self._num_copies, _comp_imag(read))
         read = (tf.add_n(xs_real)/self._num_copies, tf.add_n(xs_imag)/self._num_copies)
     return read
示例#23
0
def decode_bbox_target(box_predictions, anchors):
    """
    Args:
        box_predictions: (..., 4), logits
        anchors: (..., 4), floatbox. Must have the same shape

    Returns:
        box_decoded: (..., 4), float32. With the same shape.
    """
    orig_shape = tf.shape(anchors)
    box_pred_txtytwth = tf.reshape(box_predictions, (-1, 2, 2))
    box_pred_txty, box_pred_twth = tf.split(box_pred_txtytwth, 2, axis=1)
    # each is (...)x1x2
    anchors_x1y1x2y2 = tf.reshape(anchors, (-1, 2, 2))
    anchors_x1y1, anchors_x2y2 = tf.split(anchors_x1y1x2y2, 2, axis=1)

    waha = anchors_x2y2 - anchors_x1y1
    xaya = (anchors_x2y2 + anchors_x1y1) * 0.5

    clip = np.log(config.PREPROC.MAX_SIZE / 16.)
    wbhb = tf.exp(tf.minimum(box_pred_twth, clip)) * waha
    xbyb = box_pred_txty * waha + xaya
    x1y1 = xbyb - wbhb * 0.5
    x2y2 = xbyb + wbhb * 0.5    # (...)x1x2
    out = tf.concat([x1y1, x2y2], axis=-2)
    return tf.reshape(out, orig_shape)
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM)."""
        with tf.variable_scope(self, scope or "basic_lstm_cell", reuse=self._reuse):
            # Parameters of gates are concatenated into one multiply for
            # efficiency.
            if self._state_is_tuple:
                c_prev, h_prev = state
            else:
                c_prev, h_prev = tf.split(
                    value=state, num_or_size_splits=2, axis=1)
            concat = tf.contrib.rnn._linear(
                [inputs, h_prev], 4 * self._num_units, True)

            # i = input_gate, g = new_input, f = forget_gate, o = output_gate
            i, g, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1)

            c = (c_prev * tf.sigmoid(f + self._forget_bias) +
                 tf.sigmoid(i) * tf.tanh(g))
            h = tf.tanh(c) * tf.sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(c, h)
            else:
                new_state = tf.concat([c, h], 1)
            return h, new_state
示例#25
0
 def conv(self,
          input,
          k_h,
          k_w,
          c_o,
          s_h,
          s_w,
          name,
          relu=True,
          padding=DEFAULT_PADDING,
          group=1):
     self.validate_padding(padding)
     c_i = input.get_shape()[-1]
     assert c_i % group == 0
     assert c_o % group == 0
     convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
     with tf.variable_scope(name) as scope:
         kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o])
         biases = self.make_var('biases', [c_o])
         if group == 1:
             conv = convolve(input, kernel)
         else:
             input_groups = tf.split(3, group, input)
             kernel_groups = tf.split(3, group, kernel)
             output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
             conv = tf.concat(3, output_groups)
         if relu:
             bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list())
             return tf.nn.relu(bias, name=scope.name)
         return tf.reshape(
             tf.nn.bias_add(conv, biases),
             conv.get_shape().as_list(),
             name=scope.name)
    def build_network(self):
        net_tensors = self.net_tensors
        with self.net_graph.as_default(), tf.device(self.net_device):
            logits = tf.placeholder(dtype=tf.float32, shape=(self.batch_size, self.image_classes))
            labels = tf.placeholder(dtype=tf.int32, shape=(self.batch_size,))
            lambs = tf.placeholder(dtype=tf.float32, shape=(self.image_classes,))
            # put a sigfunction on logits and then transpose
            logits = tf.transpose(framwork.sig_func(logits))
            # according to the labels, erase rows which is not in labels

            labels_unique = tf.constant(range(self.image_classes), dtype=tf.int32)
            labels_num = self.image_classes
            logits = tf.gather(logits, indices=labels_unique)
            lambs = tf.gather(lambs, indices=labels_unique)
            # set the value of each row to True when it occurs in labels
            templete = tf.tile(tf.expand_dims(labels_unique, dim=1), [1, self.batch_size])
            labels_expand = tf.tile(tf.expand_dims(labels, dim=0), [labels_num, 1])
            indict_logic = tf.equal(labels_expand, templete)
            # split the tensor along rows
            logit_list = tf.split(0, labels_num, logits)
            indict_logic_list = tf.split(0, labels_num, indict_logic)
            lamb_list = tf.split(0, self.image_classes, lambs)
            logit_list = [tf.squeeze(item) for item in logit_list]
            indict_logic_list = [tf.squeeze(item) for item in indict_logic_list]
            left_right_tuples = list()
            for i in range(self.image_classes):
                left_right_tuples.append(framwork.lamb_func(logit_list[i], indict_logic_list[i], lamb=lamb_list[i]))
            # func = framwork.lamb_func()
            # left_right_tuples = map(func, logit_list, indict_logic_list, lamb_list)
            net_tensors.update({'left_right_tuples': left_right_tuples, 'logits': logits, 'labels': labels,
                                'lambs': lambs})
 def build_loss(self, logits, labels, lambs):
     # put a sigfunction on logits and then transpose
     logits = tf.transpose(framwork.sig_func(logits))
     # according to the labels, erase rows which is not in labels
     labels_unique = tf.constant(range(self.image_classes), dtype=tf.int32)
     labels_num = self.image_classes
     logits = tf.gather(logits, indices=labels_unique)
     lambs = tf.gather(lambs, indices=labels_unique)
     # set the value of each row to True when it occurs in labels
     template = tf.tile(tf.expand_dims(labels_unique, dim=1), [1, self.batch_size])
     labels_expand = tf.tile(tf.expand_dims(labels, dim=0), [labels_num, 1])
     indict_logic = tf.equal(labels_expand, template)
     # split the tensor along rows
     logit_list = tf.split(0, labels_num, logits)
     indict_logic_list = tf.split(0, labels_num, indict_logic)
     lambda_list = tf.split(0, self.image_classes, lambs)
     # loss_list = list()
     # for i in range(self.image_classes):
     #     loss_list.append(framwork.loss_func(logit_list[i], indict_logic_list[i], lambda_list[i]))
     loss_list = map(framwork.loss_func, logit_list, indict_logic_list, lambda_list)
     loss = tf.add_n(loss_list)
     tensors_dict = {'labels_unique': labels_unique, 'template': template, 'logits_sig_trans': logits,
                     'loss': loss, 'indict_logic': indict_logic}
     self.tensors_names.extend(tensors_dict.keys())
     self.net_tensors.update(tensors_dict)
  def call(self, x, h):
    channels = x.shape[self._feature_axis].value

    with tf.variable_scope('gates'):
      inputs = tf.concat([x, h], axis=self._feature_axis)
      n = channels + self._filters
      m = 2 * self._filters if self._filters > 1 else 2
      W = tf.get_variable('kernel', self._kernel + [n, m])
      y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
      if self._normalize:
        r, u = tf.split(y, 2, axis=self._feature_axis)
        r = tf.contrib.layers.layer_norm(r)
        u = tf.contrib.layers.layer_norm(u)
      else:
        y += tf.get_variable('bias', [m], initializer=tf.ones_initializer())
        r, u = tf.split(y, 2, axis=self._feature_axis)
      r, u = tf.sigmoid(r), tf.sigmoid(u)

      # TODO
      #tf.summary.histogram('reset_gate', r)
      #tf.summary.histogram('update_gate', u)

    with tf.variable_scope('candidate'):
      inputs = tf.concat([x, r * h], axis=self._feature_axis)
      n = channels + self._filters
      m = self._filters
      W = tf.get_variable('kernel', self._kernel + [n, m])
      y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
      if self._normalize:
        y = tf.contrib.layers.layer_norm(y)
      else:
        y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
      h = u * h + (1 - u) * self._activation(y)

	return h, h
示例#29
0
  def __call__(self, inputs, state, scope=None):
    """Long short-term memory cell (LSTM)."""
    with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      c, h = tf.split(1, 2, state)
      concat = linear.linear([inputs, h], 4 * self._num_units, True)

      fs = []

      # This can be made more efficient since we're doing more than needs to be
      # done, but for now w/e
      for child_state in child_states:
          c_k, h_k = tf.split(1, 2, child_state)
          concat = linear.linear([inputs, h_k], 4 * self._num_units, True)
          i_k, j_k, f_k, o_k = tf.split(1, 4, concat)
          fs.append(f_k)


      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      # TODO: forget gate for each child, probably need to split by number
      # of child states or something
      i, j, f, o = tf.split(1, 4, concat)

      # If no children just treat it like a regular lstm
      if not fs:
        fs.append(f)

      new_c = sum(c * tf.sigmoid(fs + self._forget_bias)) + tf.sigmoid(i) * tf.tanh(j)
      new_h = tf.tanh(new_c) * tf.sigmoid(o)

    return new_h, tf.concat(1, [new_c, new_h])
示例#30
0
    def build_loss(self, out, out_tensor):
        """Build a loss function and accuracy for the model."""
        print('  Building loss and accuracy')

        with tf.variable_scope('accuracy'):
            argmax = tf.to_int32(tf.argmax(out_tensor, 2))
            correct = tf.to_float(tf.equal(argmax, self.ts)) * self.t_mask
            accuracy = tf.reduce_sum(correct) / tf.reduce_sum(self.t_mask)

        with tf.variable_scope('loss'):
            with tf.variable_scope('split_t_and_mask'):
                split_kwargs = { 'split_dim': 1,
                                 'num_split': self.max_t_seq_len }
                ts     = tf.split(value=self.ts,     **split_kwargs)
                t_mask = tf.split(value=self.t_mask, **split_kwargs)
                t_mask = [tf.squeeze(weight) for weight in t_mask]

            loss = seq2seq.sequence_loss(out, ts, t_mask,
                                         self.max_t_seq_len)

            with tf.variable_scope('regularization'):
                regularize = tf.contrib.layers.l2_regularizer(self.reg_scale)
                params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
                reg_term = sum([regularize(param) for param in params])

            loss += reg_term

        return loss, accuracy
示例#31
0
    def _build_base_rnn(self, inputs, input_seq_lengths, forward_only=True):
        """
        Build the Language RNN

        Parameters
        ----------
        :param inputs: inputs to the RNN
        :param input_seq_lengths: vector containing the length of each input from 'inputs'
        :param forward_only: whether the RNN will be used for training or not (if true then add a dropout layer)

        Returns
        ----------
        :returns logits: each char probability for each timestep of the input, for each item of the batch
        :returns prediction: the best prediction for the input
        :returns rnn_keep_state_op: a tensorflow op to save the RNN internal state for the next batch
        :returns rnn_state_zero_op: a tensorflow op to reset the RNN internal state to zeros
        :returns input_keep_prob_ph: a placeholder for input_keep_prob of the dropout layer
                                     (None if forward_only is True)
        :returns output_keep_prob_ph: a placeholder for output_keep_prob of the dropout layer
                                      (None if forward_only is True)
        :returns rnn_tuple_state: the RNN internal state
        """
        # Define a variable to keep track of the learning process step
        global_step = tf.Variable(0, trainable=False, name='global_step')

        # If building the RNN for training then create dropout rate placeholders
        input_keep_prob_ph = output_keep_prob_ph = None
        if not forward_only:
            with tf.name_scope('dropout'):
                # Create placeholders, used to override values when running on the test set
                input_keep_prob_ph = tf.placeholder(tf.float32)
                output_keep_prob_ph = tf.placeholder(tf.float32)

        # Define cells of language model
        with tf.variable_scope('LSTM'):
            # Create each layer
            layers_list = []
            for _ in range(self.num_layers):
                cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_size,
                                                    state_is_tuple=True)

                # If building the RNN for training then add a dropoutWrapper to the cells
                if not forward_only:
                    with tf.name_scope('dropout'):
                        cell = tf.contrib.rnn.DropoutWrapper(
                            cell,
                            input_keep_prob=input_keep_prob_ph,
                            output_keep_prob=output_keep_prob_ph)
                layers_list.append(cell)

            # Store the layers in a multi-layer RNN
            cell = tf.contrib.rnn.MultiRNNCell(layers_list,
                                               state_is_tuple=True)

        # Build the input layer between input and the RNN
        with tf.variable_scope('Input_Layer'):
            w_i = tf.get_variable(
                "input_w", [self.input_dim, self.hidden_size],
                tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            b_i = tf.get_variable("input_b", [self.hidden_size],
                                  tf.float32,
                                  initializer=tf.constant_initializer(0.0))

        # Apply the input layer to the network input to produce the input for the rnn part of the network
        rnn_inputs = [
            tf.matmul(tf.squeeze(tf.cast(i, tf.float32), axis=[0]), w_i) + b_i
            for i in tf.split(axis=0,
                              num_or_size_splits=self.max_input_seq_length,
                              value=inputs)
        ]
        # Switch from a list to a tensor
        rnn_inputs = tf.stack(rnn_inputs)

        # Define some variables to store the RNN state
        # Note : tensorflow keep the state inside a batch but it's necessary to do this in order to keep the state
        #        between batches, especially when doing live transcript
        #        Another way would have been to get the state as an output of the session and feed it every time but
        #        this way is much more efficient
        with tf.variable_scope('Hidden_state'):
            state_variables = []
            for state_c, state_h in cell.zero_state(self.batch_size,
                                                    tf.float32):
                state_variables.append(
                    tf.nn.rnn_cell.LSTMStateTuple(
                        tf.Variable(state_c, trainable=False),
                        tf.Variable(state_h, trainable=False)))
            # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state
            rnn_tuple_state = tuple(state_variables)

        # Build the RNN
        with tf.name_scope('LSTM'):
            rnn_output, new_states = tf.nn.dynamic_rnn(
                cell,
                rnn_inputs,
                sequence_length=input_seq_lengths,
                initial_state=rnn_tuple_state,
                time_major=True)

        # Define an op to keep the hidden state between batches
        update_ops = []
        for state_variable, new_state in zip(rnn_tuple_state, new_states):
            # Assign the new state to the state variables on this layer
            update_ops.extend([
                state_variable[0].assign(new_state[0]),
                state_variable[1].assign(new_state[1])
            ])
        # Return a tuple in order to combine all update_ops into a single operation.
        # The tuple's actual value should not be used.
        rnn_keep_state_op = tf.tuple(update_ops)

        # Define an op to reset the hidden state to zeros
        update_ops = []
        for state_variable in rnn_tuple_state:
            # Assign the new state to the state variables on this layer
            update_ops.extend([
                state_variable[0].assign(tf.zeros_like(state_variable[0])),
                state_variable[1].assign(tf.zeros_like(state_variable[1]))
            ])
        # Return a tuple in order to combine all update_ops into a single operation.
        # The tuple's actual value should not be used.
        rnn_state_zero_op = tf.tuple(update_ops)

        # Build the output layer between the RNN and the char_map
        with tf.variable_scope('Output_layer'):
            w_o = tf.get_variable(
                "output_w", [self.hidden_size, self.num_labels],
                tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            b_o = tf.get_variable("output_b", [self.num_labels],
                                  tf.float32,
                                  initializer=tf.constant_initializer(0.0))

        # Compute the logits (each char probability for each timestep of the input, for each item of the batch)
        logits = tf.stack([
            tf.matmul(tf.squeeze(i, axis=[0]), w_o) + b_o
            for i in tf.split(axis=0,
                              num_or_size_splits=self.max_input_seq_length,
                              value=rnn_output)
        ])

        # Compute the prediction which is the best "path" of probabilities for each item of the batch
        decoded, _log_prob = tf.nn.ctc_beam_search_decoder(
            logits, input_seq_lengths)
        # Set the RNN result to the best path found
        prediction = tf.to_int32(decoded[0])

        return global_step, logits, prediction, rnn_keep_state_op, rnn_state_zero_op, \
            input_keep_prob_ph, output_keep_prob_ph, rnn_tuple_state
示例#32
0
def learn(make_env, make_policy, *,
          n_episodes,
          horizon,
          delta,
          gamma,
          max_iters,
          sampler=None,
          use_natural_gradient=False, #can be 'exact', 'approximate'
          fisher_reg=1e-2,
          iw_method='is',
          iw_norm='none',
          bound='J',
          line_search_type='parabola',
          save_weights=False,
          improvement_tol=0.,
          center_return=False,
          render_after=None,
          max_offline_iters=100,
          callback=None,
          clipping=False,
          entropy='none',
          positive_return=False,
          reward_clustering='none'):

    np.set_printoptions(precision=3)
    max_samples = horizon * n_episodes

    if line_search_type == 'binary':
        line_search = line_search_binary
    elif line_search_type == 'parabola':
        line_search = line_search_parabola
    else:
        raise ValueError()

    # Building the environment
    env = make_env()
    ob_space = env.observation_space
    ac_space = env.action_space

    # Building the policy
    pi = make_policy('pi', ob_space, ac_space)
    oldpi = make_policy('oldpi', ob_space, ac_space)

    all_var_list = pi.get_trainable_variables()
    var_list = [v for v in all_var_list if v.name.split('/')[1].startswith('pol')]

    shapes = [U.intprod(var.get_shape().as_list()) for var in var_list]
    n_parameters = sum(shapes)

    # Placeholders
    ob_ = ob = U.get_placeholder_cached(name='ob')
    ac_ = pi.pdtype.sample_placeholder([max_samples], name='ac')
    mask_ = tf.placeholder(dtype=tf.float32, shape=(max_samples), name='mask')
    rew_ = tf.placeholder(dtype=tf.float32, shape=(max_samples), name='rew')
    disc_rew_ = tf.placeholder(dtype=tf.float32, shape=(max_samples), name='disc_rew')
    clustered_rew_ = tf.placeholder(dtype=tf.float32, shape=(n_episodes))
    gradient_ = tf.placeholder(dtype=tf.float32, shape=(n_parameters, 1), name='gradient')
    iter_number_ = tf.placeholder(dtype=tf.int32, name='iter_number')
    losses_with_name = []

    # Policy densities
    target_log_pdf = pi.pd.logp(ac_)
    behavioral_log_pdf = oldpi.pd.logp(ac_)
    log_ratio = target_log_pdf - behavioral_log_pdf

    # Split operations
    disc_rew_split = tf.stack(tf.split(disc_rew_ * mask_, n_episodes))
    rew_split = tf.stack(tf.split(rew_ * mask_, n_episodes))
    log_ratio_split = tf.stack(tf.split(log_ratio * mask_, n_episodes))
    target_log_pdf_split = tf.stack(tf.split(target_log_pdf * mask_, n_episodes))
    behavioral_log_pdf_split = tf.stack(tf.split(behavioral_log_pdf * mask_, n_episodes))
    mask_split = tf.stack(tf.split(mask_, n_episodes))

    # Renyi divergence
    emp_d2_split = tf.stack(tf.split(pi.pd.renyi(oldpi.pd, 2) * mask_, n_episodes))
    emp_d2_cum_split = tf.reduce_sum(emp_d2_split, axis=1)
    empirical_d2 = tf.reduce_mean(tf.exp(emp_d2_cum_split))

    # Return
    ep_return = clustered_rew_ #tf.reduce_sum(mask_split * disc_rew_split, axis=1)
    if clipping:
        rew_split = tf.clip_by_value(rew_split, -1, 1)

    if center_return:
        ep_return = ep_return - tf.reduce_mean(ep_return)
        rew_split = rew_split - (tf.reduce_sum(rew_split) / (tf.reduce_sum(mask_split) + 1e-24))

    discounter = [pow(gamma, i) for i in range(0, horizon)] # Decreasing gamma
    discounter_tf = tf.constant(discounter)
    disc_rew_split = rew_split * discounter_tf

    #tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return_not_clustered', summarize=20))

    # Reward clustering
    '''
    rew_clustering_options = reward_clustering.split(':')
    if reward_clustering == 'none':
        pass # Do nothing
    elif rew_clustering_options[0] == 'global':
        assert len(rew_clustering_options) == 2, "Reward clustering: Provide the correct number of parameters"
        N = int(rew_clustering_options[1])
        tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return', summarize=20))
        global_rew_min = tf.Variable(float('+inf'), trainable=False)
        global_rew_max = tf.Variable(float('-inf'), trainable=False)
        rew_min = tf.reduce_min(ep_return)
        rew_max = tf.reduce_max(ep_return)
        global_rew_min = tf.assign(global_rew_min, tf.minimum(global_rew_min, rew_min))
        global_rew_max = tf.assign(global_rew_max, tf.maximum(global_rew_max, rew_max))
        interval_size = (global_rew_max - global_rew_min) / N
        ep_return = tf.floordiv(ep_return, interval_size) * interval_size
    elif rew_clustering_options[0] == 'batch':
        assert len(rew_clustering_options) == 2, "Reward clustering: Provide the correct number of parameters"
        N = int(rew_clustering_options[1])
        rew_min = tf.reduce_min(ep_return)
        rew_max = tf.reduce_max(ep_return)
        interval_size = (rew_max - rew_min) / N
        ep_return = tf.floordiv(ep_return, interval_size) * interval_size
    elif rew_clustering_options[0] == 'manual':
        assert len(rew_clustering_options) == 4, "Reward clustering: Provide the correct number of parameters"
        N, rew_min, rew_max = map(int, rew_clustering_options[1:])
        print("N:", N)
        print("Min reward:", rew_min)
        print("Max reward:", rew_max)
        interval_size = (rew_max - rew_min) / N
        print("Interval size:", interval_size)
        # Clip to avoid overflow and cluster
        ep_return = tf.clip_by_value(ep_return, rew_min, rew_max)
        ep_return = tf.cast(tf.floordiv(ep_return, interval_size) * interval_size, tf.float32)
        tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return_clustered', summarize=20))
    else:
        raise Exception('Unrecognized reward clustering scheme.')
    '''

    return_mean = tf.reduce_mean(ep_return)
    return_std = U.reduce_std(ep_return)
    return_max = tf.reduce_max(ep_return)
    return_min = tf.reduce_min(ep_return)
    return_abs_max = tf.reduce_max(tf.abs(ep_return))
    return_step_max = tf.reduce_max(tf.abs(rew_split)) # Max step reward
    return_step_mean = tf.abs(tf.reduce_mean(rew_split))
    positive_step_return_max = tf.maximum(0.0, tf.reduce_max(rew_split))
    negative_step_return_max = tf.maximum(0.0, tf.reduce_max(-rew_split))
    return_step_maxmin = tf.abs(positive_step_return_max - negative_step_return_max)

    losses_with_name.extend([(return_mean, 'InitialReturnMean'),
                             (return_max, 'InitialReturnMax'),
                             (return_min, 'InitialReturnMin'),
                             (return_std, 'InitialReturnStd'),
                             (empirical_d2, 'EmpiricalD2'),
                             (return_step_max, 'ReturnStepMax'),
                             (return_step_maxmin, 'ReturnStepMaxmin')])

    if iw_method == 'pdis':
        # log_ratio_split cumulative sum
        log_ratio_cumsum = tf.cumsum(log_ratio_split, axis=1)
        # Exponentiate
        ratio_cumsum = tf.exp(log_ratio_cumsum)
        # Multiply by the step-wise reward (not episode)
        ratio_reward = ratio_cumsum * disc_rew_split
        # Average on episodes
        ratio_reward_per_episode = tf.reduce_sum(ratio_reward, axis=1)
        w_return_mean = tf.reduce_sum(ratio_reward_per_episode, axis=0) / n_episodes
        # Get d2(w0:t) with mask
        d2_w_0t = tf.exp(tf.cumsum(emp_d2_split, axis=1)) * mask_split # LEAVE THIS OUTSIDE
        # Sum d2(w0:t) over timesteps
        episode_d2_0t = tf.reduce_sum(d2_w_0t, axis=1)
        # Sample variance
        J_sample_variance = (1/(n_episodes-1)) * tf.reduce_sum(tf.square(ratio_reward_per_episode - w_return_mean))
        losses_with_name.append((J_sample_variance, 'J_sample_variance'))
        losses_with_name.extend([(tf.reduce_max(ratio_cumsum), 'MaxIW'),
                                 (tf.reduce_min(ratio_cumsum), 'MinIW'),
                                 (tf.reduce_mean(ratio_cumsum), 'MeanIW'),
                                 (U.reduce_std(ratio_cumsum), 'StdIW')])
        losses_with_name.extend([(tf.reduce_max(d2_w_0t), 'MaxD2w0t'),
                                 (tf.reduce_min(d2_w_0t), 'MinD2w0t'),
                                 (tf.reduce_mean(d2_w_0t), 'MeanD2w0t'),
                                 (U.reduce_std(d2_w_0t), 'StdD2w0t')])

    elif iw_method == 'is':
        iw = tf.exp(tf.reduce_sum(log_ratio_split, axis=1))
        if iw_norm == 'none':
            iwn = iw / n_episodes
            w_return_mean = tf.reduce_sum(iwn * ep_return)
            J_sample_variance = (1/(n_episodes-1)) * tf.reduce_sum(tf.square(iw * ep_return - w_return_mean))
            losses_with_name.append((J_sample_variance, 'J_sample_variance'))
        elif iw_norm == 'sn':
            iwn = iw / tf.reduce_sum(iw)
            w_return_mean = tf.reduce_sum(iwn * ep_return)
        elif iw_norm == 'regression':
            iwn = iw / n_episodes
            mean_iw = tf.reduce_mean(iw)
            beta = tf.reduce_sum((iw - mean_iw) * ep_return * iw) / (tf.reduce_sum((iw - mean_iw) ** 2) + 1e-24)
            w_return_mean = tf.reduce_mean(iw * ep_return - beta * (iw - 1))
        else:
            raise NotImplementedError()
        ess_classic = tf.linalg.norm(iw, 1) ** 2 / tf.linalg.norm(iw, 2) ** 2
        sqrt_ess_classic = tf.linalg.norm(iw, 1) / tf.linalg.norm(iw, 2)
        ess_renyi = n_episodes / empirical_d2
        losses_with_name.extend([(tf.reduce_max(iwn), 'MaxIWNorm'),
                                 (tf.reduce_min(iwn), 'MinIWNorm'),
                                 (tf.reduce_mean(iwn), 'MeanIWNorm'),
                                 (U.reduce_std(iwn), 'StdIWNorm'),
                                 (tf.reduce_max(iw), 'MaxIW'),
                                 (tf.reduce_min(iw), 'MinIW'),
                                 (tf.reduce_mean(iw), 'MeanIW'),
                                 (U.reduce_std(iw), 'StdIW'),
                                 (ess_classic, 'ESSClassic'),
                                 (ess_renyi, 'ESSRenyi')])
    elif iw_method == 'rbis':
        # Get pdfs for episodes
        target_log_pdf_episode = tf.reduce_sum(target_log_pdf_split, axis=1)
        behavioral_log_pdf_episode = tf.reduce_sum(behavioral_log_pdf_split, axis=1)
        # Normalize log_proba (avoid as overflows as possible)
        normalization_factor = tf.reduce_mean(tf.stack([target_log_pdf_episode, behavioral_log_pdf_episode]))
        target_norm_log_pdf_episode = target_log_pdf_episode - normalization_factor
        behavioral_norm_log_pdf_episode = behavioral_log_pdf_episode - normalization_factor
        # Exponentiate
        target_pdf_episode = tf.clip_by_value(tf.cast(tf.exp(target_norm_log_pdf_episode), tf.float64), 1e-300, 1e+300)
        behavioral_pdf_episode = tf.clip_by_value(tf.cast(tf.exp(behavioral_norm_log_pdf_episode), tf.float64), 1e-300, 1e+300)
        tf.add_to_collection('asserts', tf.assert_positive(target_pdf_episode, name='target_pdf_positive'))
        tf.add_to_collection('asserts', tf.assert_positive(behavioral_pdf_episode, name='behavioral_pdf_positive'))
        # Compute the merging matrix (reward-clustering) and the number of clusters
        reward_unique, reward_indexes = tf.unique(ep_return)
        episode_clustering_matrix = tf.cast(tf.one_hot(reward_indexes, n_episodes), tf.float64)
        max_index = tf.reduce_max(reward_indexes) + 1
        trajectories_per_cluster = tf.reduce_sum(episode_clustering_matrix, axis=0)[:max_index]
        tf.add_to_collection('asserts', tf.assert_positive(tf.reduce_sum(episode_clustering_matrix, axis=0)[:max_index], name='clustering_matrix'))
        # Get the clustered pdfs
        clustered_target_pdf = tf.matmul(tf.reshape(target_pdf_episode, (1, -1)), episode_clustering_matrix)[0][:max_index]
        clustered_behavioral_pdf = tf.matmul(tf.reshape(behavioral_pdf_episode, (1, -1)), episode_clustering_matrix)[0][:max_index]
        tf.add_to_collection('asserts', tf.assert_positive(clustered_target_pdf, name='clust_target_pdf_positive'))
        tf.add_to_collection('asserts', tf.assert_positive(clustered_behavioral_pdf, name='clust_behavioral_pdf_positive'))
        # Compute the J
        ratio_clustered = clustered_target_pdf / clustered_behavioral_pdf
        #ratio_reward = tf.cast(ratio_clustered, tf.float32) * reward_unique                                                  # ---- No cluster cardinality
        ratio_reward = tf.cast(ratio_clustered, tf.float32) * reward_unique * tf.cast(trajectories_per_cluster, tf.float32)   # ---- Cluster cardinality
        #w_return_mean = tf.reduce_sum(ratio_reward) / tf.cast(max_index, tf.float32)                                         # ---- No cluster cardinality
        w_return_mean = tf.reduce_sum(ratio_reward) / tf.cast(n_episodes, tf.float32)                                         # ---- Cluster cardinality
        # Divergences
        ess_classic = tf.linalg.norm(ratio_reward, 1) ** 2 / tf.linalg.norm(ratio_reward, 2) ** 2
        sqrt_ess_classic = tf.linalg.norm(ratio_reward, 1) / tf.linalg.norm(ratio_reward, 2)
        ess_renyi = n_episodes / empirical_d2
        # Summaries
        losses_with_name.extend([(tf.reduce_max(ratio_clustered), 'MaxIW'),
                                 (tf.reduce_min(ratio_clustered), 'MinIW'),
                                 (tf.reduce_mean(ratio_clustered), 'MeanIW'),
                                 (U.reduce_std(ratio_clustered), 'StdIW'),
                                 (1-(max_index / n_episodes), 'RewardCompression'),
                                 (ess_classic, 'ESSClassic'),
                                 (ess_renyi, 'ESSRenyi')])
    else:
        raise NotImplementedError()

    if bound == 'J':
        bound_ = w_return_mean
    elif bound == 'std-d2':
        bound_ = w_return_mean - tf.sqrt((1 - delta) / (delta * ess_renyi)) * return_std
    elif bound == 'max-d2':
        var_estimate = tf.sqrt((1 - delta) / (delta * ess_renyi)) * return_abs_max
        bound_ = w_return_mean - tf.sqrt((1 - delta) / (delta * ess_renyi)) * return_abs_max
    elif bound == 'max-ess':
        bound_ = w_return_mean - tf.sqrt((1 - delta) / delta) / sqrt_ess_classic * return_abs_max
    elif bound == 'std-ess':
        bound_ = w_return_mean - tf.sqrt((1 - delta) / delta) / sqrt_ess_classic * return_std
    elif bound == 'pdis-max-d2':
        # Discount factor
        if gamma >= 1:
            discounter = [float(1+2*(horizon-t-1)) for t in range(0, horizon)]
        else:
            def f(t):
                return pow(gamma, 2*t) + (2*pow(gamma,t)*(pow(gamma, t+1) - pow(gamma, horizon))) / (1-gamma)
            discounter = [f(t) for t in range(0, horizon)]
        discounter_tf = tf.constant(discounter)
        mean_episode_d2 = tf.reduce_sum(d2_w_0t, axis=0) / (tf.reduce_sum(mask_split, axis=0) + 1e-24)
        discounted_d2 = mean_episode_d2 * discounter_tf # Discounted d2
        discounted_total_d2 = tf.reduce_sum(discounted_d2, axis=0) # Sum over time
        bound_ = w_return_mean - tf.sqrt((1-delta) * discounted_total_d2 / (delta*n_episodes)) * return_step_max
    elif bound == 'pdis-mean-d2':
        # Discount factor
        if gamma >= 1:
            discounter = [float(1+2*(horizon-t-1)) for t in range(0, horizon)]
        else:
            def f(t):
                return pow(gamma, 2*t) + (2*pow(gamma,t)*(pow(gamma, t+1) - pow(gamma, horizon))) / (1-gamma)
            discounter = [f(t) for t in range(0, horizon)]
        discounter_tf = tf.constant(discounter)
        mean_episode_d2 = tf.reduce_sum(d2_w_0t, axis=0) / (tf.reduce_sum(mask_split, axis=0) + 1e-24)
        discounted_d2 = mean_episode_d2 * discounter_tf # Discounted d2
        discounted_total_d2 = tf.reduce_sum(discounted_d2, axis=0) # Sum over time
        bound_ = w_return_mean - tf.sqrt((1-delta) * discounted_total_d2 / (delta*n_episodes)) * return_step_mean
    else:
        raise NotImplementedError()

    # Policy entropy for exploration
    ent = pi.pd.entropy()
    meanent = tf.reduce_mean(ent)
    losses_with_name.append((meanent, 'MeanEntropy'))
    # Add policy entropy bonus
    if entropy != 'none':
        scheme, v1, v2 = entropy.split(':')
        if scheme == 'step':
            entcoeff = tf.cond(iter_number_ < int(v2), lambda: float(v1), lambda: float(0.0))
            losses_with_name.append((entcoeff, 'EntropyCoefficient'))
            entbonus = entcoeff * meanent
            bound_ = bound_ + entbonus
        elif scheme == 'lin':
            ip = tf.cast(iter_number_ / max_iters, tf.float32)
            entcoeff_decay = tf.maximum(0.0, float(v2) + (float(v1) - float(v2)) * (1.0 - ip))
            losses_with_name.append((entcoeff_decay, 'EntropyCoefficient'))
            entbonus = entcoeff_decay * meanent
            bound_ = bound_ + entbonus
        elif scheme == 'exp':
            ent_f = tf.exp(-tf.abs(tf.reduce_mean(iw) - 1) * float(v2)) * float(v1)
            losses_with_name.append((ent_f, 'EntropyCoefficient'))
            bound_ = bound_ + ent_f * meanent
        else:
            raise Exception('Unrecognized entropy scheme.')

    losses_with_name.append((w_return_mean, 'ReturnMeanIW'))
    losses_with_name.append((bound_, 'Bound'))
    losses, loss_names = map(list, zip(*losses_with_name))

    if use_natural_gradient:
        p = tf.placeholder(dtype=tf.float32, shape=[None])
        target_logpdf_episode = tf.reduce_sum(target_log_pdf_split * mask_split, axis=1)
        grad_logprob = U.flatgrad(tf.stop_gradient(iwn) * target_logpdf_episode, var_list)
        dot_product = tf.reduce_sum(grad_logprob * p)
        hess_logprob = U.flatgrad(dot_product, var_list)
        compute_linear_operator = U.function([p, ob_, ac_, disc_rew_, mask_], [-hess_logprob])

    assign_old_eq_new = U.function([], [], updates=[tf.assign(oldv, newv)
                for (oldv, newv) in zipsame(oldpi.get_variables(), pi.get_variables())])

    assert_ops = tf.group(*tf.get_collection('asserts'))
    print_ops = tf.group(*tf.get_collection('prints'))

    compute_lossandgrad = U.function([ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], losses + [U.flatgrad(bound_, var_list), assert_ops, print_ops])
    compute_grad = U.function([ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], [U.flatgrad(bound_, var_list), assert_ops, print_ops])
    compute_bound = U.function([ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], [bound_, assert_ops, print_ops])
    compute_losses = U.function([ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], losses)
    #compute_temp = U.function([ob_, ac_, rew_, disc_rew_, mask_], [ratio_cumsum, discounted_ratio])

    set_parameter = U.SetFromFlat(var_list)
    get_parameter = U.GetFlat(var_list)

    if sampler is None:
        seg_gen = traj_segment_generator(pi, env, n_episodes, horizon, stochastic=True)
        sampler = type("SequentialSampler", (object,), {"collect": lambda self, _: seg_gen.__next__()})()

    U.initialize()

    # Starting optimizing

    episodes_so_far = 0
    timesteps_so_far = 0
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=n_episodes)
    rewbuffer = deque(maxlen=n_episodes)

    while True:

        iters_so_far += 1

        if render_after is not None and iters_so_far % render_after == 0:
            if hasattr(env, 'render'):
                render(env, pi, horizon)

        if callback:
            callback(locals(), globals())

        if iters_so_far >= max_iters:
            print('Finised...')
            break

        logger.log('********** Iteration %i ************' % iters_so_far)

        theta = get_parameter()

        with timed('sampling'):
            seg = sampler.collect(theta)

        add_disc_rew(seg, gamma)

        lens, rets = seg['ep_lens'], seg['ep_rets']
        lenbuffer.extend(lens)
        rewbuffer.extend(rets)
        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens)

        # Get clustered reward
        reward_matrix = np.reshape(seg['disc_rew'] * seg['mask'], (n_episodes, horizon))
        ep_reward = np.sum(reward_matrix, axis=1)
        if reward_clustering == 'none':
            pass
        elif reward_clustering == 'floor':
            ep_reward = np.floor(ep_reward)
        elif reward_clustering == 'ceil':
            ep_reward = np.ceil(ep_reward)
        elif reward_clustering == 'floor10':
            ep_reward = np.floor(ep_reward * 10) / 10
        elif reward_clustering == 'ceil10':
            ep_reward = np.ceil(ep_reward * 10) / 10
        elif reward_clustering == 'floor100':
            ep_reward = np.floor(ep_reward * 100) / 100
        elif reward_clustering == 'ceil100':
            ep_reward = np.ceil(ep_reward * 100) / 100


        args = ob, ac, rew, disc_rew, clustered_rew, mask, iter_number = seg['ob'], seg['ac'], seg['rew'], seg['disc_rew'], ep_reward, seg['mask'], iters_so_far

        assign_old_eq_new()

        def evaluate_loss():
            loss = compute_bound(*args)
            return loss[0]

        def evaluate_gradient():
            gradient = compute_grad(*args)
            return gradient[0]

        if use_natural_gradient:
            def evaluate_fisher_vector_prod(x):
                return compute_linear_operator(x, *args)[0] + fisher_reg * x

            def evaluate_natural_gradient(g):
                return cg(evaluate_fisher_vector_prod, g, cg_iters=10, verbose=0)
        else:
            evaluate_natural_gradient = None

        with timed('summaries before'):
            logger.record_tabular("Iteration", iters_so_far)
            logger.record_tabular("InitialBound", evaluate_loss())
            logger.record_tabular("EpLenMean", np.mean(lenbuffer))
            logger.record_tabular("EpRewMean", np.mean(rewbuffer))
            logger.record_tabular("EpThisIter", len(lens))
            logger.record_tabular("EpisodesSoFar", episodes_so_far)
            logger.record_tabular("TimestepsSoFar", timesteps_so_far)
            logger.record_tabular("TimeElapsed", time.time() - tstart)

        if save_weights:
            logger.record_tabular('Weights', str(get_parameter()))
            import pickle
            file = open('checkpoint.pkl', 'wb')
            pickle.dump(theta, file)

        with timed("offline optimization"):
            theta, improvement = optimize_offline(theta,
                                                  set_parameter,
                                                  line_search,
                                                  evaluate_loss,
                                                  evaluate_gradient,
                                                  evaluate_natural_gradient,
                                                  max_offline_ite=max_offline_iters)

        set_parameter(theta)

        with timed('summaries after'):
            meanlosses = np.array(compute_losses(*args))
            for (lossname, lossval) in zip(loss_names, meanlosses):
                logger.record_tabular(lossname, lossval)

        logger.dump_tabular()

    env.close()
示例#33
0
 def transpose_coordinates(self):
   """Transpose the coordinate representation in a boxlist."""
   with tf.name_scope('transpose_coordinates'):
     y_min, x_min, y_max, x_max = tf.split(
         value=self.get(), num_or_size_splits=4, axis=1)
     self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
示例#34
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--filelist',
                        '-t',
                        help='Path to training set ground truth (.txt)',
                        required=True)
    parser.add_argument('--filelist_val',
                        '-v',
                        help='Path to validation set ground truth (.txt)',
                        required=True)
    parser.add_argument('--load_ckpt',
                        '-l',
                        help='Path to a check point file for load')
    parser.add_argument(
        '--save_folder',
        '-s',
        help='Path to folder for saving check points and summary',
        required=True)
    parser.add_argument('--model', '-m', help='Model to use', required=True)
    parser.add_argument('--setting',
                        '-x',
                        help='Setting to use',
                        required=True)
    args = parser.parse_args()

    time_string = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    root_folder = os.path.join(
        args.save_folder,
        '%s_%s_%d_%s' % (args.model, args.setting, os.getpid(), time_string))
    if not os.path.exists(root_folder):
        os.makedirs(root_folder)

    sys.stdout = open(os.path.join(root_folder, 'log.txt'), 'w')

    print('PID:', os.getpid())

    print(args)

    model = importlib.import_module(args.model)
    setting_path = os.path.join(os.path.dirname(__file__), args.model)
    sys.path.append(setting_path)
    setting = importlib.import_module(args.setting)

    num_epochs = setting.num_epochs
    batch_size = setting.batch_size
    sample_num = setting.sample_num
    step_val = 500
    num_parts = setting.num_parts
    label_weights_list = setting.label_weights
    scaling_range = setting.scaling_range
    scaling_range_val = setting.scaling_range_val
    jitter = setting.jitter
    jitter_val = setting.jitter_val

    # Prepare inputs
    print('{}-Preparing datasets...'.format(datetime.now()))
    data_train, _, data_num_train, label_train = data_utils.load_seg(
        args.filelist)
    data_val, _, data_num_val, label_val = data_utils.load_seg(
        args.filelist_val)

    # shuffle
    data_train, data_num_train, label_train = \
        data_utils.grouped_shuffle([data_train, data_num_train, label_train])

    num_train = data_train.shape[0]
    point_num = data_train.shape[1]
    num_val = data_val.shape[0]
    print('{}-{:d}/{:d} training/validation samples.'.format(
        datetime.now(), num_train, num_val))
    batch_num = (num_train * num_epochs + batch_size - 1) // batch_size
    print('{}-{:d} training batches.'.format(datetime.now(), batch_num))

    ######################################################################
    # Placeholders
    indices = tf.placeholder(tf.int32, shape=(None, None, 2), name="indices")
    xforms = tf.placeholder(tf.float32, shape=(None, 3, 3), name="xforms")
    rotations = tf.placeholder(tf.float32,
                               shape=(None, 3, 3),
                               name="rotations")
    jitter_range = tf.placeholder(tf.float32, shape=(1), name="jitter_range")
    global_step = tf.Variable(0, trainable=False, name='global_step')
    is_training = tf.placeholder(tf.bool, name='is_training')

    pts_fts = tf.placeholder(tf.float32,
                             shape=(None, point_num, setting.data_dim),
                             name='pts_fts')
    labels_seg = tf.placeholder(tf.int32,
                                shape=(None, point_num),
                                name='labels_seg')
    labels_weights = tf.placeholder(tf.float32,
                                    shape=(None, point_num),
                                    name='labels_weights')

    ######################################################################
    features_augmented = None
    if setting.data_dim > 3:
        points, features = tf.split(pts_fts, [3, setting.data_dim - 3],
                                    axis=-1,
                                    name='split_points_features')
        if setting.use_extra_features:
            features_sampled = tf.gather_nd(features,
                                            indices=indices,
                                            name='features_sampled')
            if setting.with_normal_feature:
                if setting.data_dim < 6:
                    print('Only 3D normals are supported!')
                    exit()
                elif setting.data_dim == 6:
                    features_augmented = pf.augment(features_sampled,
                                                    rotations)
                else:
                    normals, rest = tf.split(features_sampled,
                                             [3, setting.data_dim - 6])
                    normals_augmented = pf.augment(normals, rotations)
                    features_augmented = tf.concat([normals_augmented, rest],
                                                   axis=-1)
            else:
                features_augmented = features_sampled
    else:
        points = pts_fts

    points_sampled = tf.gather_nd(points,
                                  indices=indices,
                                  name='points_sampled')
    points_augmented = pf.augment(points_sampled, xforms, jitter_range)
    labels_sampled = tf.gather_nd(labels_seg,
                                  indices=indices,
                                  name='labels_sampled')
    labels_weights_sampled = tf.gather_nd(labels_weights,
                                          indices=indices,
                                          name='labels_weight_sampled')

    net = model.Net(points_augmented, features_augmented, num_parts,
                    is_training, setting)
    logits, probs = net.logits, net.probs

    loss_op = tf.losses.sparse_softmax_cross_entropy(
        labels=labels_sampled, logits=logits, weights=labels_weights_sampled)

    t_1_acc_op = pf.top_1_accuracy(probs, labels_sampled)
    _ = tf.summary.scalar('loss/train_seg',
                          tensor=loss_op,
                          collections=['train'])
    _ = tf.summary.scalar('t_1_acc/train_seg',
                          tensor=t_1_acc_op,
                          collections=['train'])

    loss_val_avg = tf.placeholder(tf.float32)
    t_1_acc_val_avg = tf.placeholder(tf.float32)
    _ = tf.summary.scalar('loss/val_seg',
                          tensor=loss_val_avg,
                          collections=['val'])
    _ = tf.summary.scalar('t_1_acc/val_seg',
                          tensor=t_1_acc_val_avg,
                          collections=['val'])

    lr_exp_op = tf.train.exponential_decay(setting.learning_rate_base,
                                           global_step,
                                           setting.decay_steps,
                                           setting.decay_rate,
                                           staircase=True)
    lr_clip_op = tf.maximum(lr_exp_op, setting.learning_rate_min)
    _ = tf.summary.scalar('learning_rate',
                          tensor=lr_clip_op,
                          collections=['train'])
    reg_loss = setting.weight_decay * tf.losses.get_regularization_loss()
    if setting.optimizer == 'adam':
        optimizer = tf.train.AdamOptimizer(learning_rate=lr_clip_op,
                                           epsilon=setting.epsilon)
    elif setting.optimizer == 'momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate=lr_clip_op,
                                               momentum=0.9,
                                               use_nesterov=True)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss_op + reg_loss,
                                      global_step=global_step)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    saver = tf.train.Saver(max_to_keep=None)

    # backup this file, model and setting
    shutil.copy(__file__, os.path.join(root_folder,
                                       os.path.basename(__file__)))
    shutil.copy(os.path.join(os.path.dirname(__file__), args.model + '.py'),
                os.path.join(root_folder, args.model + '.py'))
    if not os.path.exists(os.path.join(root_folder, args.model)):
        os.makedirs(os.path.join(root_folder, args.model))
    shutil.copy(os.path.join(setting_path, args.setting + '.py'),
                os.path.join(root_folder, args.model, args.setting + '.py'))

    folder_ckpt = os.path.join(root_folder, 'ckpts')
    if not os.path.exists(folder_ckpt):
        os.makedirs(folder_ckpt)

    folder_summary = os.path.join(root_folder, 'summary')
    if not os.path.exists(folder_summary):
        os.makedirs(folder_summary)

    parameter_num = np.sum(
        [np.prod(v.shape.as_list()) for v in tf.trainable_variables()])
    print('{}-Parameter number: {:d}.'.format(datetime.now(), parameter_num))

    with tf.Session() as sess:
        summaries_op = tf.summary.merge_all('train')
        summaries_val_op = tf.summary.merge_all('val')
        summary_writer = tf.summary.FileWriter(folder_summary, sess.graph)

        sess.run(init_op)

        # Load the model
        if args.load_ckpt is not None:
            saver.restore(sess, args.load_ckpt)
            print('{}-Checkpoint loaded from {}!'.format(
                datetime.now(), args.load_ckpt))

        for batch_idx in range(batch_num):
            if (batch_idx != 0 and batch_idx % step_val
                    == 0) or batch_idx == batch_num - 1:
                ######################################################################
                # Validation
                filename_ckpt = os.path.join(folder_ckpt, 'iter')
                saver.save(sess, filename_ckpt, global_step=global_step)
                print('{}-Checkpoint saved to {}!'.format(
                    datetime.now(), filename_ckpt))

                losses = []
                t_1_accs = []
                for batch_val_idx in range(math.ceil(num_val / batch_size)):
                    start_idx = batch_size * batch_val_idx
                    end_idx = min(start_idx + batch_size, num_val)
                    batch_size_val = end_idx - start_idx
                    points_batch = data_val[start_idx:end_idx, ...]
                    points_num_batch = data_num_val[start_idx:end_idx, ...]
                    labels_batch = label_val[start_idx:end_idx, ...]
                    weights_batch = np.array(label_weights_list)[label_val[
                        start_idx:end_idx, ...]]

                    xforms_np, rotations_np = pf.get_xforms(
                        batch_size_val, scaling_range=scaling_range_val)
                    _, loss_val, t_1_acc_val = \
                        sess.run([update_ops, loss_op, t_1_acc_op],
                                 feed_dict={
                                     pts_fts: points_batch,
                                     indices: pf.get_indices(batch_size_val, sample_num, points_num_batch, False),
                                     xforms: xforms_np,
                                     rotations: rotations_np,
                                     jitter_range: np.array([jitter_val]),
                                     labels_seg: labels_batch,
                                     labels_weights: weights_batch,
                                     is_training: False,
                                 })
                    losses.append(loss_val * batch_size_val)
                    t_1_accs.append(t_1_acc_val * batch_size_val)
                    print(
                        '{}-[Val  ]-Iter: {:06d}  Loss: {:.4f}  T-1 Acc: {:.4f}'
                        .format(datetime.now(), batch_val_idx, loss_val,
                                t_1_acc_val))
                    sys.stdout.flush()

                loss_avg = sum(losses) / num_val
                t_1_acc_avg = sum(t_1_accs) / num_val
                summaries_val = sess.run(summaries_val_op,
                                         feed_dict={
                                             loss_val_avg: loss_avg,
                                             t_1_acc_val_avg: t_1_acc_avg,
                                         })
                summary_writer.add_summary(summaries_val, batch_idx)
                print('{}-[Val  ]-Average:      Loss: {:.4f}  T-1 Acc: {:.4f}'.
                      format(datetime.now(), loss_avg, t_1_acc_avg))
                sys.stdout.flush()
                ######################################################################

            ######################################################################
            # Training
            start_idx = (batch_size * batch_idx) % num_train
            end_idx = min(start_idx + batch_size, num_train)
            batch_size_train = end_idx - start_idx
            points_batch = data_train[start_idx:end_idx, ...]
            points_num_batch = data_num_train[start_idx:end_idx, ...]
            labels_batch = label_train[start_idx:end_idx, ...]
            weights_batch = np.array(label_weights_list)[labels_batch]

            if start_idx + batch_size_train == num_train:
                data_train, data_num_train, label_train = \
                    data_utils.grouped_shuffle([data_train, data_num_train, label_train])

            offset = int(random.gauss(0, sample_num // 8))
            offset = max(offset, -sample_num // 4)
            offset = min(offset, sample_num // 4)
            sample_num_train = sample_num + offset
            xforms_np, rotations_np = pf.get_xforms(
                batch_size_train, scaling_range=scaling_range)
            _, loss, t_1_acc, summaries = \
                sess.run([train_op, loss_op, t_1_acc_op, summaries_op],
                         feed_dict={
                             pts_fts: points_batch,
                             indices: pf.get_indices(batch_size_train, sample_num_train, points_num_batch),
                             xforms: xforms_np,
                             rotations: rotations_np,
                             jitter_range: np.array([jitter]),
                             labels_seg: labels_batch,
                             labels_weights: weights_batch,
                             is_training: True,
                         })
            summary_writer.add_summary(summaries, batch_idx)
            print('{}-[Train]-Iter: {:06d}  Loss: {:.4f}  T-1 Acc: {:.4f}'.
                  format(datetime.now(), batch_idx, loss, t_1_acc))
            sys.stdout.flush()
            ######################################################################
        print('{}-Done!'.format(datetime.now()))
示例#35
0
    def __init__(self,
                 embedding_mat,
                 non_static,
                 hidden_unit,
                 sequence_length,
                 max_pool_size,
                 num_classes,
                 embedding_size,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0):

        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name='input_x')
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name='input_y')
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name='dropout_keep_prob')
        self.batch_size = tf.placeholder(tf.int32, [])
        self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1],
                                  name='pad')
        self.real_len = tf.placeholder(tf.int32, [None], name='real_len')

        l2_loss = tf.constant(0.0)

        with tf.device('/cpu:0'), tf.name_scope('embedding'):
            if not non_static:
                W = tf.constant(embedding_mat, name='W')
            else:
                W = tf.Variable(embedding_mat, name='W')
            self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
            emb = tf.expand_dims(self.embedded_chars, -1)

        pooled_concat = []
        reduced = np.int32(np.ceil((sequence_length) * 1.0 / max_pool_size))

        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope('conv-maxpool-%s' % filter_size):
                # Zero paddings so that the convolution output have dimension batch x sequence_length x emb_size x channel
                num_prio = (filter_size - 1) // 2
                num_post = (filter_size - 1) - num_prio
                pad_prio = tf.concat([self.pad] * num_prio, 1)
                pad_post = tf.concat([self.pad] * num_post, 1)
                emb_pad = tf.concat([pad_prio, emb, pad_post], 1)

                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name='W')
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                name='b')
                conv = tf.nn.conv2d(emb_pad,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding='VALID',
                                    name='conv')

                h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')

                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(h,
                                        ksize=[1, max_pool_size, 1, 1],
                                        strides=[1, max_pool_size, 1, 1],
                                        padding='SAME',
                                        name='pool')
                pooled = tf.reshape(pooled, [-1, reduced, num_filters])
                pooled_concat.append(pooled)

        pooled_concat = tf.concat(pooled_concat, 2)
        pooled_concat = tf.nn.dropout(pooled_concat, self.dropout_keep_prob)

        # lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=hidden_unit)

        # lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=hidden_unit)
        lstm_cell = tf.contrib.rnn.GRUCell(num_units=hidden_unit)

        # lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=self.dropout_keep_prob)
        lstm_cell = tf.contrib.rnn.DropoutWrapper(
            lstm_cell, output_keep_prob=self.dropout_keep_prob)

        self._initial_state = lstm_cell.zero_state(self.batch_size, tf.float32)
        # inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, reduced, pooled_concat)]
        inputs = [
            tf.squeeze(input_, [1]) for input_ in tf.split(
                pooled_concat, num_or_size_splits=int(reduced), axis=1)
        ]
        # outputs, state = tf.nn.rnn(lstm_cell, inputs, initial_state=self._initial_state, sequence_length=self.real_len)
        outputs, state = tf.contrib.rnn.static_rnn(
            lstm_cell,
            inputs,
            initial_state=self._initial_state,
            sequence_length=self.real_len)

        # Collect the appropriate last words into variable output (dimension = batch x embedding_size)
        output = outputs[0]
        with tf.variable_scope('Output'):
            tf.get_variable_scope().reuse_variables()
            one = tf.ones([1, hidden_unit], tf.float32)
            for i in range(1, len(outputs)):
                ind = self.real_len < (i + 1)
                ind = tf.to_float(ind)
                ind = tf.expand_dims(ind, -1)
                mat = tf.matmul(ind, one)
                output = tf.add(tf.multiply(output, mat),
                                tf.multiply(outputs[i], 1.0 - mat))

        with tf.name_scope('output'):
            self.W = tf.Variable(tf.truncated_normal(
                [hidden_unit, num_classes], stddev=0.1),
                                 name='W')
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name='b')
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(output, self.W, b, name='scores')
            self.predictions = tf.argmax(self.scores, 1, name='predictions')

        with tf.name_scope('loss'):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                labels=self.input_y,
                logits=self.scores)  # only named arguments accepted
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        with tf.name_scope('accuracy'):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name='accuracy')

        with tf.name_scope('num_correct'):
            correct = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.num_correct = tf.reduce_sum(tf.cast(correct, 'float'))
示例#36
0
# a specific format. Read more at: https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn
all_inputs = tf.concat([tf.expand_dims(t, 0) for t in train_inputs], axis=0)

# all_outputs is [seq_length, batch_size, num_nodes]
all_lstm_outputs, state = tf.nn.dynamic_rnn(drop_multi_cell,
                                            all_inputs,
                                            initial_state=tuple(initial_state),
                                            time_major=True,
                                            dtype=tf.float32)

all_lstm_outputs = tf.reshape(all_lstm_outputs,
                              [batch_size * num_unrollings, num_nodes[-1]])

all_outputs = tf.nn.xw_plus_b(all_lstm_outputs, w, b)

split_outputs = tf.split(all_outputs, num_unrollings, axis=0)

# When calculating the loss you need to be careful about the exact form, because you calculate
# loss of all the unrolled steps at the same time
# Therefore, take the mean error or each batch and get the sum of that over all the unrolled steps

print('Defining training Loss')
loss = 0.0
with tf.control_dependencies(
    [tf.assign(c[li], state[li][0]) for li in range(n_layers)] +
    [tf.assign(h[li], state[li][1]) for li in range(n_layers)]):
    for ui in range(num_unrollings):
        loss += tf.reduce_mean(0.5 *
                               (split_outputs[ui] - train_outputs[ui])**2)

print('Learning rate decay operations')
def G_synthesis_co_mod_gan(
        dlatents_in,  # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size].
        images_in,
        masks_in,
        dlatent_size=512,  # Disentangled latent (W) dimensionality.
        num_channels=3,  # Number of output color channels.
        resolution=1024,  # Output resolution.
        fmap_base=16 <<
    10,  # Overall multiplier for the number of feature maps.
        fmap_decay=1.0,  # log2 feature map reduction when doubling the resolution.
        fmap_min=1,  # Minimum number of feature maps in any layer.
        fmap_max=512,  # Maximum number of feature maps in any layer.
        randomize_noise=True,  # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables.
        architecture='skip',  # Architecture: 'orig', 'skip', 'resnet'.
        nonlinearity='lrelu',  # Activation function: 'relu', 'lrelu', etc.
        dtype='float32',  # Data type to use for activations and outputs.
        resample_kernel=[
            1, 3, 3, 1
        ],  # Low-pass filter to apply when resampling activations. None = no filtering.
        fused_modconv=True,  # Implement modulated_conv2d_layer() as a single fused op?
        is_training=False,  # Network is under training? Enables and disables specific features.
        pix2pix=False,
        dropout_rate=0.5,
        cond_mod=True,
        style_mod=True,
        noise_injection=True,
        **_kwargs):  # Ignore unrecognized keyword args.

    resolution_log2 = int(np.log2(resolution))
    assert resolution == 2**resolution_log2 and resolution >= 4

    def nf(stage):
        return np.clip(int(fmap_base / (2.0**(stage * fmap_decay))), fmap_min,
                       fmap_max)

    assert architecture in ['orig', 'skip', 'resnet']
    act = nonlinearity
    num_layers = resolution_log2 * 2 - 2
    images_out = None

    images_in.set_shape([None, num_channels, resolution, resolution])
    masks_in.set_shape([None, 1, resolution, resolution])
    images_in = tf.cast(images_in, dtype)
    masks_in = tf.cast(masks_in, dtype)

    def E_fromrgb(x, y, res):  # res = 2..resolution_log2
        with tf.variable_scope('FromRGB'):
            t = apply_bias_act(conv2d_layer(y, fmaps=nf(res - 1), kernel=1),
                               act=act)
            return t if x is None else x + t

    def E_block(x, res, E_features):  # res = 2..resolution_log2
        with tf.variable_scope('Conv0'):
            x = apply_bias_act(conv2d_layer(x, fmaps=nf(res - 1), kernel=3),
                               act=act)
        E_features[res] = x
        with tf.variable_scope('Conv1_down'):
            x = apply_bias_act(conv2d_layer(x,
                                            fmaps=nf(res - 2),
                                            kernel=3,
                                            down=True,
                                            resample_kernel=resample_kernel),
                               act=act)
        return x

    # Primary inputs.
    dlatents_in.set_shape([None, num_layers, dlatent_size])
    dlatents_in = tf.cast(dlatents_in, dtype)

    # Noise inputs.
    noise_inputs = []
    for layer_idx in range(num_layers - 1):
        res = (layer_idx + 5) // 2
        shape = [1, 1, 2**res, 2**res]
        noise_inputs.append(
            tf.get_variable('noise%d' % layer_idx,
                            shape=shape,
                            initializer=tf.initializers.random_normal(),
                            trainable=False))

    # Main layers.
    E_features = {}
    x = None
    if pix2pix:
        num_channels = num_channels // 2
        _, y = tf.split(images_in, 2, axis=1)
        cond = y
    else:
        y = tf.concat([masks_in - 0.5, images_in * masks_in], axis=1)
    for res in range(resolution_log2, 2, -1):
        with tf.variable_scope('E_%dx%d' % (2**res, 2**res)):
            if res == resolution_log2:
                x = E_fromrgb(x, y, res)
            x = E_block(x, res, E_features)

    # Final layers.
    with tf.variable_scope('E_4x4'):
        with tf.variable_scope('Conv'):
            x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act)
        E_features[2] = x
        with tf.variable_scope('Dense0'):
            x = apply_bias_act(dense_layer(x, fmaps=nf(1) * 2), act=act)
        # if is_training:
        x = tf.nn.dropout(x, dropout_rate)
    x_global = x

    # Single convolution layer with all the bells and whistles.
    def layer(x, layer_idx, fmaps, kernel, up=False):
        mod_vector = []
        if style_mod:
            mod_vector.append(dlatents_in[:, layer_idx])
        if cond_mod:
            mod_vector.append(x_global)
        x = modulated_conv2d_layer(
            x,
            tf.concat(mod_vector, axis=1) if mod_vector else None,
            fmaps=fmaps,
            kernel=kernel,
            up=up,
            resample_kernel=resample_kernel,
            fused_modconv=fused_modconv)
        if noise_injection:
            if randomize_noise:
                noise = tf.random_normal(
                    [tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype)
            else:
                noise = tf.cast(noise_inputs[layer_idx], x.dtype)
            noise_strength = tf.get_variable(
                'noise_strength',
                shape=[],
                initializer=tf.initializers.zeros())
            x += noise * tf.cast(noise_strength, x.dtype)
        return apply_bias_act(x, act=act)

    # Building blocks for main layers.
    def block(x, res, E_features):  # res = 3..resolution_log2
        x_skip = E_features[res]
        t = x
        with tf.variable_scope('Conv0_up'):
            x = layer(x,
                      layer_idx=res * 2 - 5,
                      fmaps=nf(res - 1),
                      kernel=3,
                      up=True)
        x = x + x_skip
        with tf.variable_scope('Conv1'):
            x = layer(x, layer_idx=res * 2 - 4, fmaps=nf(res - 1), kernel=3)
        if architecture == 'resnet':
            with tf.variable_scope('Skip'):
                t = conv2d_layer(t,
                                 fmaps=nf(res - 1),
                                 kernel=1,
                                 up=True,
                                 resample_kernel=resample_kernel)
                x = (x + t) * (1 / np.sqrt(2))
        return x

    def upsample(y):
        with tf.variable_scope('Upsample'):
            return upsample_2d(y, k=resample_kernel)

    def torgb(x, y, res):  # res = 2..resolution_log2
        mod_vector = []
        if style_mod:
            mod_vector.append(dlatents_in[:, res * 2 - 3])
        if cond_mod:
            mod_vector.append(x_global)
        with tf.variable_scope('ToRGB'):
            t = apply_bias_act(
                modulated_conv2d_layer(
                    x,
                    tf.concat(mod_vector, axis=1) if mod_vector else None,
                    fmaps=num_channels,
                    kernel=1,
                    demodulate=False,
                    fused_modconv=fused_modconv))
            return t if y is None else y + t

    # Early layers.
    y = None
    with tf.variable_scope('G_4x4'):
        with tf.variable_scope('Dense'):
            x = apply_bias_act(dense_layer(x, fmaps=nf(1) * 4 * 4), act=act)
        x = tf.reshape(x, [-1, nf(1), 4, 4])
        x = x + E_features[2]
        with tf.variable_scope('Conv'):
            x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3)
        if architecture == 'skip':
            y = torgb(x, y, 2)

    # Main layers.
    for res in range(3, resolution_log2 + 1):
        with tf.variable_scope('G_%dx%d' % (2**res, 2**res)):
            x = block(x, res, E_features)
            if architecture == 'skip':
                y = upsample(y)
            if architecture == 'skip' or res == resolution_log2:
                y = torgb(x, y, res)
    if pix2pix:
        images_out = tf.concat([y, cond], axis=1)
    else:
        images_out = y * (1 - masks_in) + images_in * masks_in

    assert images_out.dtype == tf.as_dtype(dtype)
    return tf.identity(images_out, name='images_out')
    def build(self):

        # Setup input placeholders
        self._set_up_input_pls()

        # Setup feature extractors
        self._set_up_feature_extractors()

        bev_proposal_input = self.bev_bottleneck
        img_proposal_input = self.img_bottleneck

        fusion_mean_div_factor = 2.0

        # If both img and bev probabilites are set to 1.0, don't do
        # path drop.
        if not (self._path_drop_probabilities[0] ==
                self._path_drop_probabilities[1] == 1.0):
            with tf.variable_scope('rpn_path_drop'):

                random_values = tf.random_uniform(shape=[3],
                                                  minval=0.0,
                                                  maxval=1.0)

                img_mask, bev_mask = self.create_path_drop_masks(
                    self._path_drop_probabilities[0],
                    self._path_drop_probabilities[1], random_values)

                img_proposal_input = tf.multiply(img_proposal_input, img_mask)

                bev_proposal_input = tf.multiply(bev_proposal_input, bev_mask)

                self.img_path_drop_mask = img_mask
                self.bev_path_drop_mask = bev_mask

                # Overwrite the division factor
                fusion_mean_div_factor = img_mask + bev_mask

        with tf.variable_scope('proposal_roi_pooling'):

            with tf.variable_scope('box_indices'):

                def get_box_indices(boxes):
                    proposals_shape = boxes.get_shape().as_list()
                    if any(dim is None for dim in proposals_shape):
                        proposals_shape = tf.shape(boxes)
                    ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
                    multiplier = tf.expand_dims(
                        tf.range(start=0, limit=proposals_shape[0]), 1)
                    return tf.reshape(ones_mat * multiplier, [-1])

                bev_boxes_norm_batches = tf.expand_dims(
                    self._bev_anchors_norm_pl, axis=0)

                # These should be all 0's since there is only 1 image
                tf_box_indices = get_box_indices(bev_boxes_norm_batches)

            # Do ROI Pooling on BEV
            bev_proposal_rois = tf.image.crop_and_resize(
                bev_proposal_input, self._bev_anchors_norm_pl, tf_box_indices,
                self._proposal_roi_crop_size)
            # Do ROI Pooling on image
            img_proposal_rois = tf.image.crop_and_resize(
                img_proposal_input, self._img_anchors_norm_pl, tf_box_indices,
                self._proposal_roi_crop_size)

            print("img_proposal_rois shape")
            # print(img_proposal_rois.shape)
            # for i in range(img_proposal_rois.shape[0]):
            # print(img_proposal_rois[i])
        ####################################################################################
        # TODO PROJECT: insert code here to add mixture of experts
        # self._moe_model = MoeModel(img_proposal_input, bev_proposal_input)
        # self._moe_model._set_up_input_pls()
        # moe_prediction = self._moe_model.build()

        ####################################################################################
        with tf.variable_scope('proposal_roi_fusion'):
            rpn_fusion_out = None
            ####################################################################################
            # TODO PROJECT: weight the feature before average img and bev
            # weighted_img_proposal_rois = tf.multiply(moe_prediction['img_weight'],img_proposal_rois)
            # weighted_bev_proposal_rois = tf.multiply(moe_prediction['bev_weight'],bev_proposal_rois)
            ####################################################################################
            if self._fusion_method == 'mean':
                tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois)
                rpn_fusion_out = tf.divide(tf_features_sum,
                                           fusion_mean_div_factor)

                ####################################################################################
                # TODO PROJECT: weight the feature before average img and bev
                # tf_features_sum = tf.add(weighted_bev_proposal_rois, weighted_img_proposal_rois)
                # rpn_fusion_out = tf.divide(tf_features_sum, fusion_mean_div_factor)
                ####################################################################################
            elif self._fusion_method == 'concat':
                rpn_fusion_out = tf.concat(
                    [bev_proposal_rois, img_proposal_rois], axis=3)

                ####################################################################################
                # TODO PROJECT: weight the feature before concatenation
                # rpn_fusion_out = tf.concat(
                # [weighted_bev_proposal_rois, weighted_img_proposal_rois], axis=3)
                ####################################################################################
            else:
                raise ValueError('Invalid fusion method', self._fusion_method)

        # TODO: move this section into an separate AnchorPredictor class
        with tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]):
            tensor_in = rpn_fusion_out

            # Parse rpn layers config
            layers_config = self._config.layers_config.rpn_config
            l2_weight_decay = layers_config.l2_weight_decay

            if l2_weight_decay > 0:
                weights_regularizer = slim.l2_regularizer(l2_weight_decay)
            else:
                weights_regularizer = None

            with slim.arg_scope([slim.conv2d],
                                weights_regularizer=weights_regularizer):
                # Use conv2d instead of fully_connected layers.
                cls_fc6 = slim.conv2d(tensor_in,
                                      layers_config.cls_fc6,
                                      self._proposal_roi_crop_size,
                                      padding='VALID',
                                      scope='cls_fc6')

                cls_fc6_drop = slim.dropout(cls_fc6,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='cls_fc6_drop')

                cls_fc7 = slim.conv2d(cls_fc6_drop,
                                      layers_config.cls_fc7, [1, 1],
                                      scope='cls_fc7')

                cls_fc7_drop = slim.dropout(cls_fc7,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='cls_fc7_drop')

                cls_fc8 = slim.conv2d(cls_fc7_drop,
                                      2, [1, 1],
                                      activation_fn=None,
                                      scope='cls_fc8')

                objectness = tf.squeeze(cls_fc8, [1, 2],
                                        name='cls_fc8/squeezed')

                # Use conv2d instead of fully_connected layers.
                reg_fc6 = slim.conv2d(tensor_in,
                                      layers_config.reg_fc6,
                                      self._proposal_roi_crop_size,
                                      padding='VALID',
                                      scope='reg_fc6')

                reg_fc6_drop = slim.dropout(reg_fc6,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='reg_fc6_drop')

                reg_fc7 = slim.conv2d(reg_fc6_drop,
                                      layers_config.reg_fc7, [1, 1],
                                      scope='reg_fc7')

                reg_fc7_drop = slim.dropout(reg_fc7,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='reg_fc7_drop')

                reg_fc8 = slim.conv2d(reg_fc7_drop,
                                      6, [1, 1],
                                      activation_fn=None,
                                      scope='reg_fc8')

                offsets = tf.squeeze(reg_fc8, [1, 2], name='reg_fc8/squeezed')

        # Histogram summaries
        with tf.variable_scope('histograms_feature_extractor'):
            with tf.variable_scope('bev_vgg'):
                for end_point in self.bev_end_points:
                    tf.summary.histogram(end_point,
                                         self.bev_end_points[end_point])

            with tf.variable_scope('img_vgg'):
                for end_point in self.img_end_points:
                    tf.summary.histogram(end_point,
                                         self.img_end_points[end_point])

        with tf.variable_scope('histograms_rpn'):
            with tf.variable_scope('anchor_predictor'):
                fc_layers = [
                    cls_fc6, cls_fc7, cls_fc8, objectness, reg_fc6, reg_fc7,
                    reg_fc8, offsets
                ]
                for fc_layer in fc_layers:
                    # fix the name to avoid tf warnings
                    tf.summary.histogram(fc_layer.name.replace(':', '_'),
                                         fc_layer)

        # Return the proposals
        with tf.variable_scope('proposals'):
            anchors = self.placeholders[self.PL_ANCHORS]

            # Decode anchor regression offsets
            with tf.variable_scope('decoding'):
                regressed_anchors = anchor_encoder.offset_to_anchor(
                    anchors, offsets)

            with tf.variable_scope('bev_projection'):
                _, bev_proposal_boxes_norm = anchor_projector.project_to_bev(
                    regressed_anchors, self._bev_extents)

            with tf.variable_scope('softmax'):
                objectness_softmax = tf.nn.softmax(objectness)

            with tf.variable_scope('nms'):
                objectness_scores = objectness_softmax[:, 1]

                # Do NMS on regressed anchors
                top_indices = tf.image.non_max_suppression(
                    bev_proposal_boxes_norm,
                    objectness_scores,
                    max_output_size=self._nms_size,
                    iou_threshold=self._nms_iou_thresh)

                top_anchors = tf.gather(regressed_anchors, top_indices)
                top_objectness_softmax = tf.gather(objectness_scores,
                                                   top_indices)
                # top_offsets = tf.gather(offsets, top_indices)
                # top_objectness = tf.gather(objectness, top_indices)

        # Get mini batch
        all_ious_gt = self.placeholders[self.PL_ANCHOR_IOUS]
        all_offsets_gt = self.placeholders[self.PL_ANCHOR_OFFSETS]
        all_classes_gt = self.placeholders[self.PL_ANCHOR_CLASSES]

        with tf.variable_scope('mini_batch'):
            mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils
            mini_batch_mask, _ = \
                mini_batch_utils.sample_rpn_mini_batch(all_ious_gt)

        # ROI summary images
        rpn_mini_batch_size = \
            self.dataset.kitti_utils.mini_batch_utils.rpn_mini_batch_size
        with tf.variable_scope('bev_rpn_rois'):
            mb_bev_anchors_norm = tf.boolean_mask(self._bev_anchors_norm_pl,
                                                  mini_batch_mask)
            mb_bev_box_indices = tf.zeros_like(tf.boolean_mask(
                all_classes_gt, mini_batch_mask),
                                               dtype=tf.int32)

            # Show the ROIs of the BEV input density map
            # for the mini batch anchors
            bev_input_rois = tf.image.crop_and_resize(self._bev_preprocessed,
                                                      mb_bev_anchors_norm,
                                                      mb_bev_box_indices,
                                                      (32, 32))

            bev_input_roi_summary_images = tf.split(bev_input_rois,
                                                    self._bev_depth,
                                                    axis=3)
            tf.summary.image('bev_rpn_rois',
                             bev_input_roi_summary_images[-1],
                             max_outputs=rpn_mini_batch_size)

        with tf.variable_scope('img_rpn_rois'):
            # ROIs on image input
            mb_img_anchors_norm = tf.boolean_mask(self._img_anchors_norm_pl,
                                                  mini_batch_mask)
            mb_img_box_indices = tf.zeros_like(tf.boolean_mask(
                all_classes_gt, mini_batch_mask),
                                               dtype=tf.int32)

            # Do test ROI pooling on mini batch
            img_input_rois = tf.image.crop_and_resize(self._img_preprocessed,
                                                      mb_img_anchors_norm,
                                                      mb_img_box_indices,
                                                      (32, 32))

            tf.summary.image('img_rpn_rois',
                             img_input_rois,
                             max_outputs=rpn_mini_batch_size)

        # Ground Truth Tensors
        with tf.variable_scope('one_hot_classes'):

            # Anchor classification ground truth
            # Object / Not Object
            min_pos_iou = \
                self.dataset.kitti_utils.mini_batch_utils.rpn_pos_iou_range[0]

            objectness_classes_gt = tf.cast(tf.greater_equal(
                all_ious_gt, min_pos_iou),
                                            dtype=tf.int32)
            objectness_gt = tf.one_hot(
                objectness_classes_gt,
                depth=2,
                on_value=1.0 - self._config.label_smoothing_epsilon,
                off_value=self._config.label_smoothing_epsilon)

        # Mask predictions for mini batch
        with tf.variable_scope('prediction_mini_batch'):
            objectness_masked = tf.boolean_mask(objectness, mini_batch_mask)
            offsets_masked = tf.boolean_mask(offsets, mini_batch_mask)

        with tf.variable_scope('ground_truth_mini_batch'):
            objectness_gt_masked = tf.boolean_mask(objectness_gt,
                                                   mini_batch_mask)
            offsets_gt_masked = tf.boolean_mask(all_offsets_gt,
                                                mini_batch_mask)

        # Specify the tensors to evaluate
        predictions = dict()

        # Temporary predictions for debugging
        # predictions['anchor_ious'] = anchor_ious
        # predictions['anchor_offsets'] = all_offsets_gt

        if self._train_val_test in ['train', 'val']:
            # All anchors
            predictions[self.PRED_ANCHORS] = anchors

            # Mini-batch masks
            predictions[self.PRED_MB_MASK] = mini_batch_mask
            # Mini-batch predictions
            predictions[self.PRED_MB_OBJECTNESS] = objectness_masked
            predictions[self.PRED_MB_OFFSETS] = offsets_masked

            # Mini batch ground truth
            predictions[self.PRED_MB_OFFSETS_GT] = offsets_gt_masked
            predictions[self.PRED_MB_OBJECTNESS_GT] = objectness_gt_masked

            # Proposals after nms
            predictions[self.PRED_TOP_INDICES] = top_indices
            predictions[self.PRED_TOP_ANCHORS] = top_anchors
            predictions[
                self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax

        else:
            # self._train_val_test == 'test'
            predictions[self.PRED_TOP_ANCHORS] = top_anchors
            predictions[
                self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax

        return predictions
示例#39
0
    def __init__(self,
                 embedding_mat,
                 non_static,
                 hidden_unit,
                 sequence_length,
                 max_pool_size,
                 num_classes,
                 embedding_size,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0):

        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name='input_x')
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name='input_y')
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name='dropout_keep_prob')
        self.batch_size = tf.placeholder(tf.int32)
        self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1],
                                  name='pad')
        self.real_len = tf.placeholder(tf.int32, [None], name='real_len')

        l2_loss = tf.constant(0.0)

        with tf.device('/cpu:0'), tf.name_scope('embedding'):
            if not non_static:
                W = tf.constant(embedding_mat, name='W')
            else:
                W = tf.Variable(embedding_mat, name='W')
            self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
        #    emb = tf.expand_dims(self.embedded_chars, -1)
        reduced = sequence_length

        lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=hidden_unit)
        lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=int(hidden_unit / 2))
        lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
            lstm_cell, output_keep_prob=self.dropout_keep_prob)
        lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(num_units=int(hidden_unit / 2))
        lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(
            lstm_bw_cell, output_keep_prob=self.dropout_keep_prob)
        self._initial_state = lstm_cell.zero_state(self.batch_size, tf.float32)
        self._initial_state_bw = lstm_bw_cell.zero_state(
            self.batch_size, tf.float32)
        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, reduced, self.embedded_chars)
        ]
        outputs, _, _ = tf.nn.bidirectional_rnn(
            lstm_cell,
            lstm_bw_cell,
            inputs,
            initial_state_fw=self._initial_state,
            initial_state_bw=self._initial_state_bw,
            sequence_length=self.real_len)

        # Collect the appropriate last words into variable output (dimension = batch x embedding_size)
        output = outputs[0]
        with tf.variable_scope('Output'):
            tf.get_variable_scope().reuse_variables()
            one = tf.ones([1, hidden_unit], tf.float32)
            for i in range(1, len(outputs)):
                ind = self.real_len < (i + 1)
                ind = tf.to_float(ind)
                ind = tf.expand_dims(ind, -1)
                mat = tf.matmul(ind, one)
                output = tf.add(tf.mul(output, mat),
                                tf.mul(outputs[i], 1.0 - mat))

        with tf.name_scope('output'):
            self.W = tf.Variable(tf.truncated_normal(
                [hidden_unit, num_classes], stddev=0.1),
                                 name='W')
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name='b')
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(output, self.W, b, name='scores')
            self.predictions = tf.argmax(self.scores, 1, name='predictions')

        with tf.name_scope('loss'):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                self.scores, self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        with tf.name_scope('accuracy'):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name='accuracy')

        with tf.name_scope('num_correct'):
            correct = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.num_correct = tf.reduce_sum(tf.cast(correct, 'float'))
示例#40
0
    def __call__(self, x, prev_state):
        prev_read_vector_list = prev_state.read_vector_list

        controller_input = tf.concat([x] + prev_read_vector_list, axis=1)
        with tf.compat.v1.variable_scope('controller', reuse=self.reuse):
            controller_output, controller_state = self.controller(controller_input, prev_state.controller_state)

        num_parameters_per_head = self.memory_vector_dim + 1 + 1 + (self.shift_range * 2 + 1) + 1
        num_heads = self.read_head_num + self.write_head_num
        total_parameter_num = num_parameters_per_head * num_heads + self.memory_vector_dim * 2 * self.write_head_num
        with tf.compat.v1.variable_scope("o2p", reuse=(self.step > 0) or self.reuse):
            parameters = tf.compat.v1.layers.dense(
                controller_output, total_parameter_num, activation=None,
                kernel_initializer=self.o2p_initializer)
            parameters = tf.clip_by_value(parameters, -self.clip_value, self.clip_value)
        head_parameter_list = tf.split(parameters[:, :num_parameters_per_head * num_heads], num_heads, axis=1)
        erase_add_list = tf.split(parameters[:, num_parameters_per_head * num_heads:], 2 * self.write_head_num, axis=1)

        prev_w_list = prev_state.w_list
        prev_M = prev_state.M
        w_list = []
        for i, head_parameter in enumerate(head_parameter_list):
            k = tf.tanh(head_parameter[:, 0:self.memory_vector_dim])
            beta = tf.nn.softplus(head_parameter[:, self.memory_vector_dim])
            g = tf.sigmoid(head_parameter[:, self.memory_vector_dim + 1])
            s = tf.nn.softmax(
                head_parameter[:, self.memory_vector_dim + 2:self.memory_vector_dim + 2 + (self.shift_range * 2 + 1)]
            )
            gamma = tf.nn.softplus(head_parameter[:, -1]) + 1
            with tf.compat.v1.variable_scope('addressing_head_%d' % i):
                w = self.addressing(k, beta, g, s, gamma, prev_M, prev_w_list[i])
            w_list.append(w)

        # Reading (Sec 3.1)

        read_w_list = w_list[:self.read_head_num]
        read_vector_list = []
        for i in range(self.read_head_num):
            read_vector = tf.reduce_sum(tf.expand_dims(read_w_list[i], axis=2) * prev_M, axis=1)
            read_vector_list.append(read_vector)

        # Writing (Sec 3.2)

        write_w_list = w_list[self.read_head_num:]
        M = prev_M
        for i in range(self.write_head_num):
            w = tf.expand_dims(write_w_list[i], axis=2)
            erase_vector = tf.expand_dims(tf.sigmoid(erase_add_list[i * 2]), axis=1)
            add_vector = tf.expand_dims(tf.tanh(erase_add_list[i * 2 + 1]), axis=1)
            M = M * (tf.ones(M.get_shape()) - tf.matmul(w, erase_vector)) + tf.matmul(w, add_vector)

        if not self.output_dim:
            output_dim = x.get_shape()[1]
        else:
            output_dim = self.output_dim
        with tf.compat.v1.variable_scope("o2o", reuse=(self.step > 0) or self.reuse):
            NTM_output = tf.compat.v1.layers.dense(
                tf.concat([controller_output] + read_vector_list, axis=1), output_dim, activation=None,
                kernel_initializer=self.o2o_initializer)
            NTM_output = tf.clip_by_value(NTM_output, -self.clip_value, self.clip_value)

        self.step += 1
        return NTM_output, NTMControllerState(
            controller_state=controller_state, read_vector_list=read_vector_list, w_list=w_list, M=M)
示例#41
0
文件: lexicalize.py 项目: ml-lab/tgen
    def _init_neural_network(self):
        """Initialize the RNNLM network."""

        with tf.variable_scope(self.scope_name):
            # TODO dropout
            # I/O placeholders
            self._inputs = tf.placeholder(tf.int32, [None, self.max_sent_len],
                                          name='inputs')
            self._targets = tf.placeholder(tf.int32, [None, self.max_sent_len],
                                           name='targets')

            # RNN cell type
            if self.cell_type.startswith('gru'):
                self._cell = tf.nn.rnn_cell.GRUCell(self.emb_size)
            else:
                self._cell = tf.nn.rnn_cell.BasicLSTMCell(self.emb_size)
            if re.match(r'/[0-9]$', self.cell_type):
                self._cell = tf.nn.rnn_cell.MultiRNNCell(
                    [self.cell] * int(self.cell_type[-1]))
            self._initial_state = self._cell.zero_state(
                tf.shape(self._inputs)[0], tf.float32)

            # embeddings
            emb_cell = tf.nn.rnn_cell.EmbeddingWrapper(self._cell,
                                                       self.vocab_size)
            # RNN encoder
            inputs = [
                tf.squeeze(input_, [1])
                for input_ in tf.split(1, self.max_sent_len, self._inputs)
            ]
            outputs, states = tf.nn.rnn(emb_cell,
                                        inputs,
                                        initial_state=self._initial_state)

            # output layer
            output = tf.reshape(tf.concat(1, outputs), [-1, self.emb_size])
            self._logits = (tf.matmul(
                output, tf.get_variable("W", [self.emb_size, self.vocab_size]))
                            + tf.get_variable("b", [self.vocab_size]))

            # cost
            targets_1d = tf.reshape(self._targets, [-1])
            self._loss = tf.nn.seq2seq.sequence_loss_by_example(
                [self._logits], [targets_1d],
                [tf.ones_like(targets_1d, dtype=tf.float32)], self.vocab_size)
            self._cost = tf.reduce_mean(self._loss)

            # optimizer
            self._learning_rate = tf.placeholder(tf.float32,
                                                 name="learning_rate")
            if self.optimizer_type == 'sgd':
                opt = tf.train.GradientDescentOptimizer(self._learning_rate)
            if self.optimizer_type == 'adagrad':
                opt = tf.train.AdagradOptimizer(self._learning_rate)
            else:
                opt = tf.train.AdamOptimizer(self._learning_rate)

            # gradient clipping
            grads_tvars = opt.compute_gradients(self._loss,
                                                tf.trainable_variables())
            grads, _ = tf.clip_by_global_norm([g for g, _ in grads_tvars],
                                              self.max_grad_norm)
            self._train_func = opt.apply_gradients(
                zip(grads, [v for _, v in grads_tvars]))

        # initialize TF session
        session_config = None
        if self.max_cores:
            session_config = tf.ConfigProto(
                inter_op_parallelism_threads=self.max_cores,
                intra_op_parallelism_threads=self.max_cores)
        self.session = tf.Session(config=session_config)
    def _set_up_input_pls(self):
        """Sets up input placeholders by adding them to self._placeholders.
        Keys are defined as self.PL_*.
        """
        # Combine config data
        bev_dims = np.append(self._bev_pixel_size, self._bev_depth)

        with tf.variable_scope('bev_input'):
            # Placeholder for BEV image input, to be filled in with feed_dict
            bev_input_placeholder = self._add_placeholder(
                tf.float32, bev_dims, self.PL_BEV_INPUT)

            self._bev_input_batches = tf.expand_dims(bev_input_placeholder,
                                                     axis=0)

            self._bev_preprocessed = \
                self._bev_feature_extractor.preprocess_input(
                    self._bev_input_batches, self._bev_pixel_size)

            # Summary Images
            bev_summary_images = tf.split(bev_input_placeholder,
                                          self._bev_depth,
                                          axis=2)
            tf.summary.image("bev_maps",
                             bev_summary_images,
                             max_outputs=self._bev_depth)

        with tf.variable_scope('img_input'):
            # Take variable size input images
            img_input_placeholder = self._add_placeholder(
                tf.float32, [None, None, self._img_depth], self.PL_IMG_INPUT)

            self._img_input_batches = tf.expand_dims(img_input_placeholder,
                                                     axis=0)

            self._img_preprocessed = \
                self._img_feature_extractor.preprocess_input(
                    self._img_input_batches, self._img_pixel_size)

            # Summary Image
            tf.summary.image("rgb_image",
                             self._img_preprocessed,
                             max_outputs=2)

        with tf.variable_scope('pl_labels'):
            self._add_placeholder(tf.float32, [None, 6], self.PL_LABEL_ANCHORS)
            self._add_placeholder(tf.float32, [None, 7],
                                  self.PL_LABEL_BOXES_3D)
            self._add_placeholder(tf.float32, [None], self.PL_LABEL_CLASSES)

        # Placeholders for anchors
        with tf.variable_scope('pl_anchors'):
            self._add_placeholder(tf.float32, [None, 6], self.PL_ANCHORS)
            self._add_placeholder(tf.float32, [None], self.PL_ANCHOR_IOUS)
            self._add_placeholder(tf.float32, [None, 6],
                                  self.PL_ANCHOR_OFFSETS)
            self._add_placeholder(tf.float32, [None], self.PL_ANCHOR_CLASSES)

            with tf.variable_scope('bev_anchor_projections'):
                self._add_placeholder(tf.float32, [None, 4],
                                      self.PL_BEV_ANCHORS)
                self._bev_anchors_norm_pl = self._add_placeholder(
                    tf.float32, [None, 4], self.PL_BEV_ANCHORS_NORM)

            with tf.variable_scope('img_anchor_projections'):
                self._add_placeholder(tf.float32, [None, 4],
                                      self.PL_IMG_ANCHORS)
                self._img_anchors_norm_pl = self._add_placeholder(
                    tf.float32, [None, 4], self.PL_IMG_ANCHORS_NORM)

            with tf.variable_scope('sample_info'):
                # the calib matrix shape is (3 x 4)
                self._add_placeholder(tf.float32, [3, 4], self.PL_CALIB_P2)
                self._add_placeholder(tf.int32,
                                      shape=[1],
                                      name=self.PL_IMG_IDX)
                self._add_placeholder(tf.float32, [4], self.PL_GROUND_PLANE)
示例#43
0
    def __init__(self, is_training, args):

        self.batch_size = batch_size = np.int32(args['max_group_size'])
        self.num_steps = num_steps = np.int32(args['num_steps'])
        self.num_features = num_features = args['num_features']
        self.dense_units = dense_units = 1
        self.hidden_size = size = np.int32(args['hidden_size'])

        if self.num_steps < 1:
            print 'num_steps cannot be zero -- setting to 1'
            self.num_steps = num_steps = 1

        if self.hidden_size < 10:
            print 'hidden_size should not be less than 10 -- setting to 10'
            self.hidden_size = size = 10

        if is_training:
            print 'Initiating input tensors of shape: {}'.format(
                (num_steps, batch_size, num_features))

        self._input_data = inputs = tf.placeholder(
            tf.float32, [num_steps, batch_size, num_features])
        self._targets = tf.placeholder(tf.float32, [num_steps, batch_size])

        # Memory cell to use in model
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=size,
                                                 forget_bias=0.0,
                                                 state_is_tuple=True)

        if is_training:
            print 'Memory Cell: {}'.format(type(lstm_cell))

        # Wrap the memory cell in a dropout layer (for outputs)
        if is_training and args['keep_prob'] < 1:
            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
                cell=lstm_cell, output_keep_prob=args['keep_prob'])

        # Create the RNN with 'num_layers' layers
        stacked_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] *
                                                   args['num_layers'],
                                                   state_is_tuple=True)

        # Initialize the state -- it will hold the last output, h_t, as well as the memory state, c_t
        # Shape will be [batch_size, num_units x 2] -- splitting on dimension 1 will separate output and memory
        self._initial_state = stacked_cell.zero_state(
            batch_size=tf.constant(batch_size), dtype=tf.float32)

        # Split the inputs (by timestep)
        inputs = [
            tf.squeeze(input, [0]) for input in tf.split(0, num_steps, inputs)
        ]

        # Computes dropout for inputs
        if is_training and args['keep_prob'] < 1:
            inputs = [tf.nn.dropout(x, args['keep_prob']) for x in inputs]

        # Run inputs through the RNN
        outputs, state = tf.nn.rnn(stacked_cell,
                                   inputs,
                                   initial_state=self._initial_state)

        # Re-joins all output tensors (from each timestep)
        output = tf.reshape(tf.concat(1, outputs), shape=[-1, size])

        # Add a fully-connected layer
        self.dense_w = dense_w = tf.get_variable('dense_w',
                                                 shape=[size, dense_units])
        self.dense_b = dense_b = tf.get_variable('dense_b',
                                                 shape=[dense_units])

        # Feed the output from the RNN to the fully-connected layer
        self._predictions = predictions = tf.matmul(output, dense_w) + dense_b
        self._predictions = predictions = tf.reshape(
            self.predictions, shape=[num_steps, batch_size])

        # Compute the R^2
        numerator = tf.reduce_sum(
            tf.square(tf.sub(self.targets, self.predictions)))
        denominator = tf.reduce_sum(
            tf.square(tf.sub(self.targets, tf.reduce_mean(self.targets))))
        self.r2 = r2 = tf.sub(1.0, tf.div(numerator, denominator))

        # MSE cost function
        self._cost = cost = tf.reduce_mean(
            tf.square(tf.sub(self.targets, self.predictions)))
        self._final_state = state

        # Variable for state (for when saving model)
        self.save_state = tf.Variable(
            tf.zeros([args['num_layers'], 2, batch_size, size]))
        self.save_state.assign(state)

        if is_training:

            self._lr = tf.Variable(0.0, trainable=False)

            # Compute the gradients
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(t_list=tf.gradients(cost, tvars),
                                              clip_norm=args['max_grad_norm'])

            # Adjust the parameters based on optimizer and gradients
            optimizer = args['optimizer'](self.lr)
            self._train_op = optimizer.apply_gradients(
                grads_and_vars=zip(grads, tvars))

            # Summaries for Tensorboard
            cost_summ = tf.scalar_summary('mean squared error', cost)
            r2_summ = tf.scalar_summary('r-squared', r2)
            state_summ = tf.histogram_summary('states', state)
            pred_summ = tf.histogram_summary('predictions', predictions)
            self.summary = tf.merge_all_summaries()

        else:
            # Ignore this -- put here so errors are prevented when running model not in training mode
            self.summary = predictions

        return
示例#44
0
文件: train.py 项目: huluwa-yt8m/yt8m
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):
    """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit from
      BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
      from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
      compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an unlimited
      number of passes.
  """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    local_device_protos = device_lib.list_local_devices()
    gpus = [x.name for x in local_device_protos if x.device_type == "GPU"]
    gpus = gpus[:FLAGS.num_gpu]
    num_gpus = len(gpus)

    if num_gpus > 0:
        logging.info("Using the following GPUs to train: " + str(gpus))
        num_towers = num_gpus
        device_string = "/gpu:%d"
    else:
        logging.info("No GPUs found. Training on CPU.")
        num_towers = 1
        device_string = "/cpu:%d"

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size *
                                               num_towers,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar("learning_rate", learning_rate)

    optimizer = optimizer_class(learning_rate)
    input_data_dict = (get_input_data_tensors(reader,
                                              train_data_pattern,
                                              batch_size=batch_size *
                                              num_towers,
                                              num_readers=num_readers,
                                              num_epochs=num_epochs))
    model_input_raw = input_data_dict["video_matrix"]
    labels_batch = input_data_dict["labels"]
    num_frames = input_data_dict["num_frames"]
    print("model_input_shape, ", model_input_raw.shape)
    tf.summary.histogram("model/input_raw", model_input_raw)

    feature_dim = len(model_input_raw.get_shape()) - 1

    model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)

    tower_inputs = tf.split(model_input, num_towers)
    tower_labels = tf.split(labels_batch, num_towers)
    tower_num_frames = tf.split(num_frames, num_towers)
    tower_gradients = []
    tower_predictions = []
    tower_label_losses = []
    tower_reg_losses = []
    for i in range(num_towers):
        # For some reason these 'with' statements can't be combined onto the same
        # line. They have to be nested.
        with tf.device(device_string % i):
            with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)):
                with (slim.arg_scope(
                    [slim.model_variable, slim.variable],
                        device="/cpu:0" if num_gpus != 1 else "/gpu:0")):
                    result = model.create_model(tower_inputs[i],
                                                num_frames=tower_num_frames[i],
                                                vocab_size=reader.num_classes,
                                                labels=tower_labels[i])
                    for variable in slim.get_model_variables():
                        tf.summary.histogram(variable.op.name, variable)

                    predictions = result["predictions"]
                    tower_predictions.append(predictions)

                    if "loss" in result.keys():
                        label_loss = result["loss"]
                    else:
                        label_loss = label_loss_fn.calculate_loss(
                            predictions, tower_labels[i])

                    if "regularization_loss" in result.keys():
                        reg_loss = result["regularization_loss"]
                    else:
                        reg_loss = tf.constant(0.0)

                    reg_losses = tf.losses.get_regularization_losses()
                    if reg_losses:
                        reg_loss += tf.add_n(reg_losses)

                    tower_reg_losses.append(reg_loss)

                    # Adds update_ops (e.g., moving average updates in batch normalization) as
                    # a dependency to the train_op.
                    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                    if "update_ops" in result.keys():
                        update_ops += result["update_ops"]
                    if update_ops:
                        with tf.control_dependencies(update_ops):
                            barrier = tf.no_op(name="gradient_barrier")
                            with tf.control_dependencies([barrier]):
                                label_loss = tf.identity(label_loss)

                    tower_label_losses.append(label_loss)

                    # Incorporate the L2 weight penalties etc.
                    final_loss = regularization_penalty * reg_loss + label_loss
                    gradients = optimizer.compute_gradients(
                        final_loss, colocate_gradients_with_ops=False)
                    tower_gradients.append(gradients)
    label_loss = tf.reduce_mean(tf.stack(tower_label_losses))
    tf.summary.scalar("label_loss", label_loss)
    if regularization_penalty != 0:
        reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses))
        tf.summary.scalar("reg_loss", reg_loss)
    merged_gradients = utils.combine_gradients(tower_gradients)

    if clip_gradient_norm > 0:
        with tf.name_scope("clip_grads"):
            merged_gradients = utils.clip_gradient_norms(
                merged_gradients, clip_gradient_norm)

    train_op = optimizer.apply_gradients(merged_gradients,
                                         global_step=global_step)

    tf.add_to_collection("global_step", global_step)
    tf.add_to_collection("loss", label_loss)
    tf.add_to_collection("predictions", tf.concat(tower_predictions, 0))
    tf.add_to_collection("input_batch_raw", model_input_raw)
    tf.add_to_collection("input_batch", model_input)
    tf.add_to_collection("num_frames", num_frames)
    tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
    tf.add_to_collection("train_op", train_op)
    def get_q_values_op(self,
                        state,
                        past_a,
                        seq_len,
                        h_state,
                        scope,
                        reuse=False):
        """
        Returns Q values for all actions

        Args:
            state: (tf tensor) 
                shape = (batch_size, seq_len, img_w, img_h, nchannel)
            goal_state: (tf tensor)
                shape = (batch_size, 1, img_w, img_h, nchannel, 4)
            past_a: (tf tensor)
                shape = (batch_size*seq_len,)
            seq_len: (tf tensor)
                shape = (batch_size,)
            h_state: (tf tensor) 
                shape = (batch_size, h_size)
            scope: (string) scope name, that specifies if target network or not
            reuse: (bool) reuse of variables in the scope

        Returns:
            out: (tf tensor) of shape = (batch_size * seq_len, num_actions)
            h_state_out: (tf tensor) of shape = (batch_size, h_size)
        """
        num_actions = 2
        h_size = self.config.h_size
        max_seq_len = tf.shape(state)[1]
        state_shape = list(
            [4 * 3, 3, len(self.env.state.xmap.item_class_id) + 2])
        past_a = tf.reshape(tf.one_hot(past_a, num_actions),
                            shape=(-1, max_seq_len, 1, num_actions))
        past_a = tf.tile(past_a, multiples=[1, 1, 4, 1])
        out = tf.reshape(state,
                         shape=(-1, max_seq_len, 4,
                                np.int32(state_shape[0] * state_shape[1] *
                                         state_shape[2] / 4)))
        with tf.variable_scope(scope, reuse=False):
            #### recurrent

            out = tf.concat([out, past_a], axis=3)
            out = layers.fully_connected(layers.fully_connected(out, 200), 100)
            out = tf.reduce_max(out, axis=2)
            lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=h_size)
            out, h_state_out = tf.nn.dynamic_rnn(inputs=out,
                                                 cell=lstm_cell,
                                                 sequence_length=seq_len,
                                                 dtype=tf.float32,
                                                 initial_state=h_state)
            out = tf.reshape(out, shape=[-1, h_size])

            #### feed forward
            '''
            out = layers.fully_connected(layers.fully_connected(out, 200), 100)
            out = tf.reduce_max(out, axis=2)
            out = tf.reshape(out, shape=[-1,100])
            h_state_out = h_state
            '''

            streamA, streamV = tf.split(out, 2, axis=1)
            advantage = layers.fully_connected(
                streamA,
                num_actions,
                activation_fn=None,
                weights_initializer=layers.xavier_initializer(),
                biases_initializer=tf.zeros_initializer())
            value = layers.fully_connected(
                streamV,
                1,
                activation_fn=None,
                weights_initializer=layers.xavier_initializer(),
                biases_initializer=tf.zeros_initializer())
            out = value + tf.subtract(
                advantage, tf.reduce_mean(advantage, axis=1, keep_dims=True))
        return out, h_state_out
示例#46
0
def hsplit(x, n_splits):
    return _tf.split(x, num_or_size_splits=n_splits, axis=1)
示例#47
0
    def _module_fn():
        """
        Function building the module
        """

        feature_layer = tf.placeholder(
            tf.float32,
            shape=[None, None, None, None, nchannels],
            name='input')
        obs_layer = tf.placeholder(tf.float32,
                                   shape=[None, None, None, None, n_y],
                                   name='observations')

        # Builds the neural network
        net = slim.conv3d(feature_layer,
                          16,
                          5,
                          activation_fn=tf.nn.leaky_relu,
                          padding='valid')
        #net = wide_resnet(feature_layer, 8, activation_fn=tf.nn.leaky_relu, is_training=is_training)
        net = wide_resnet(net,
                          16,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = wide_resnet(net,
                          32,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = wide_resnet(net,
                          32,
                          activation_fn=tf.nn.leaky_relu,
                          keep_prob=dropout,
                          is_training=is_training)
        net = slim.conv3d(net, 32, 3, activation_fn=tf.nn.tanh)

        # Define the probabilistic layer
        net = slim.conv3d(net, n_mixture * 3 * n_y, 1, activation_fn=None)
        cube_size = tf.shape(obs_layer)[1]
        net = tf.reshape(
            net, [-1, cube_size, cube_size, cube_size, n_y, n_mixture * 3])
        #         net = tf.reshape(net, [None, None, None, None, n_y, n_mixture*3])
        loc, unconstrained_scale, logits = tf.split(net,
                                                    num_or_size_splits=3,
                                                    axis=-1)
        scale = tf.nn.softplus(unconstrained_scale)

        # Form mixture of discretized logistic distributions. Note we shift the
        # logistic distribution by -0.5. This lets the quantization capture "rounding"
        # intervals, `(x-0.5, x+0.5]`, and not "ceiling" intervals, `(x-1, x]`.
        discretized_logistic_dist = tfd.QuantizedDistribution(
            distribution=tfd.TransformedDistribution(
                distribution=tfd.Logistic(loc=loc, scale=scale),
                bijector=tfb.AffineScalar(shift=-0.5)),
            low=0.,
            high=2.**3 - 1)

        mixture_dist = tfd.MixtureSameFamily(
            mixture_distribution=tfd.Categorical(logits=logits),
            components_distribution=discretized_logistic_dist)

        # Define a function for sampling, and a function for estimating the log likelihood
        sample = tf.squeeze(mixture_dist.sample())
        loglik = mixture_dist.log_prob(obs_layer)
        hub.add_signature(inputs={
            'features': feature_layer,
            'labels': obs_layer
        },
                          outputs={
                              'sample': sample,
                              'loglikelihood': loglik
                          })
示例#48
0
    def _create_svgd_update(self):
        """Create a minimization operation for policy update (SVGD)."""

        actions = self.policy.actions_for(
            observations=self._observations_ph,
            n_action_samples=self._kernel_n_particles,
            reuse=True)
        assert_shape(actions,
                     [None, self._kernel_n_particles, self._action_dim])

        # SVGD requires computing two empirical expectations over actions
        # (see Appendix C1.1.). To that end, we first sample a single set of
        # actions, and later split them into two sets: `fixed_actions` are used
        # to evaluate the expectation indexed by `j` and `updated_actions`
        # the expectation indexed by `i`.
        n_updated_actions = int(
            self._kernel_n_particles * self._kernel_update_ratio)
        n_fixed_actions = self._kernel_n_particles - n_updated_actions

        fixed_actions, updated_actions = tf.split(
            actions, [n_fixed_actions, n_updated_actions], axis=1)
        fixed_actions = tf.stop_gradient(fixed_actions)
        assert_shape(fixed_actions, [None, n_fixed_actions, self._action_dim])
        assert_shape(updated_actions,
                     [None, n_updated_actions, self._action_dim])

        svgd_target_values = self.qf.output_for(
            self._observations_ph[:, None, :], fixed_actions, reuse=True)

        # Target log-density. Q_soft in Equation 13:
        squash_correction = tf.reduce_sum(
            tf.log(1 - fixed_actions**2 + EPS), axis=-1)
        log_p = svgd_target_values + squash_correction

        grad_log_p = tf.gradients(log_p, fixed_actions)[0]
        grad_log_p = tf.expand_dims(grad_log_p, axis=2)
        grad_log_p = tf.stop_gradient(grad_log_p)
        assert_shape(grad_log_p, [None, n_fixed_actions, 1, self._action_dim])

        kernel_dict = self._kernel_fn(xs=fixed_actions, ys=updated_actions)

        # Kernel function in Equation 13:
        kappa = tf.expand_dims(kernel_dict["output"], dim=3)
        assert_shape(kappa, [None, n_fixed_actions, n_updated_actions, 1])

        # Stein Variational Gradient in Equation 13:
        action_gradients = tf.reduce_mean(
            kappa * grad_log_p + kernel_dict["gradient"], reduction_indices=1)
        assert_shape(action_gradients,
                     [None, n_updated_actions, self._action_dim])

        # Propagate the gradient through the policy network (Equation 14).
        gradients = tf.gradients(
            updated_actions,
            self.policy.get_params_internal(),
            grad_ys=action_gradients)

        surrogate_loss = tf.reduce_sum([
            tf.reduce_sum(w * tf.stop_gradient(g))
            for w, g in zip(self.policy.get_params_internal(), gradients)
        ])

        if self._train_policy:
            optimizer = tf.train.AdamOptimizer(self._policy_lr)
            svgd_training_op = optimizer.minimize(
                loss=-surrogate_loss,
                var_list=self.policy.get_params_internal())
            self._training_ops.append(svgd_training_op)
示例#49
0
    def _extract_box_classifier_features(self, proposal_feature_maps, scope):
        """Extracts second stage box classifier features.

    This function reconstructs the "second half" of the PNASNet
    network after the part defined in `_extract_proposal_features`.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name.

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
        del scope

        # Number of used stem cells.
        num_stem_cells = 2

        # Note that we always feed into 2 layers of equal depth
        # where the first N channels corresponds to previous hidden layer
        # and the second N channels correspond to the final hidden layer.
        hidden_previous, hidden = tf.split(proposal_feature_maps, 2, axis=3)

        # Note that what follows is largely a copy of build_pnasnet_large() within
        # pnasnet.py. We are copying to minimize code pollution in slim.

        # TODO(shlens,skornblith): Determine the appropriate drop path schedule.
        # For now the schedule is the default (1.0->0.7 over 250,000 train steps).
        hparams = pnasnet.large_imagenet_config()
        if not self._is_training:
            hparams.set_hparam('drop_path_keep_prob', 1.0)

        # Calculate the total number of cells in the network
        total_num_cells = hparams.num_cells + num_stem_cells

        normal_cell = pnasnet.PNasNetNormalCell(hparams.num_conv_filters,
                                                hparams.drop_path_keep_prob,
                                                total_num_cells,
                                                hparams.total_training_steps)
        with arg_scope([slim.dropout, nasnet_utils.drop_path],
                       is_training=self._is_training):
            with arg_scope([slim.batch_norm],
                           is_training=self._train_batch_norm):
                with arg_scope([
                        slim.avg_pool2d, slim.max_pool2d, slim.conv2d,
                        slim.batch_norm, slim.separable_conv2d,
                        nasnet_utils.factorized_reduction,
                        nasnet_utils.global_avg_pool,
                        nasnet_utils.get_channel_index,
                        nasnet_utils.get_channel_dim
                ],
                               data_format=hparams.data_format):

                    # This corresponds to the cell number just past 'Cell_7' used by
                    # _extract_proposal_features().
                    start_cell_num = 8
                    true_cell_num = start_cell_num + num_stem_cells

                    with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()):
                        net = _build_pnasnet_base(
                            hidden_previous,
                            hidden,
                            normal_cell=normal_cell,
                            hparams=hparams,
                            true_cell_num=true_cell_num,
                            start_cell_num=start_cell_num)

        proposal_classifier_features = net
        return proposal_classifier_features
    def build_computation_graph(self):
        """
        notes on notation:

        Symbolic variables have the prefix sy_, to distinguish them from the numerical values
        that are computed later in the function

        prefixes and suffixes:
        ob - observation
        ac - action
        _no - this tensor should have shape (batch self.size /n/, observation dim)
        _na - this tensor should have shape (batch self.size /n/, action dim)
        _n  - this tensor should have shape (batch self.size /n/)

        Note: batch self.size /n/ is defined at runtime, and until then, the shape for that axis
        is None

        ----------------------------------------------------------------------------------
        loss: a function of self.sy_lp_n and self.sy_adv_n that we will differentiate
            to get the policy gradient.
        """
        self.sy_ob_no, self.sy_ac_na, self.sy_adv_n, self.sy_hidden, self.sy_lp_n, self.sy_fixed_lp_n = self.define_placeholders(
        )

        # The policy takes in an observation and produces a distribution over the action space
        policy_outputs = self.policy_forward_pass(self.sy_ob_no,
                                                  self.sy_hidden)
        self.policy_parameters = policy_outputs[:-1]

        # unpack mean and variance
        self.policy_parameters = tf.split(self.policy_parameters[0], 2, axis=1)

        # We can sample actions from this action distribution.
        # This will be called in Agent.sample_trajectory() where we generate a rollout.
        self.sy_sampled_ac = self.sample_action(self.policy_parameters)

        # We can also compute the logprob of the actions that were actually taken by the policy
        # This is used in the loss function.
        self.sy_lp_n = self.get_log_prob(self.policy_parameters, self.sy_ac_na)

        # PPO critic update
        critic_regularizer = tf.contrib.layers.l2_regularizer(
            1e-3) if self.l2reg else None
        self.critic_prediction = tf.squeeze(
            build_critic(self.sy_ob_no,
                         self.sy_hidden,
                         1,
                         'critic_network',
                         n_layers=self.n_layers,
                         size=self.size,
                         gru_size=self.gru_size,
                         recurrent=self.recurrent,
                         regularizer=critic_regularizer))
        self.sy_target_n = tf.placeholder(shape=[None],
                                          name="critic_target",
                                          dtype=tf.float32)
        self.critic_loss = tf.losses.mean_squared_error(
            self.sy_target_n, self.critic_prediction)
        self.critic_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                                scope='critic_network')
        self.critic_update_op = tf.train.AdamOptimizer(
            self.learning_rate).minimize(self.critic_loss)

        # PPO actor update
        self.sy_fixed_log_prob_n = tf.placeholder(shape=[None],
                                                  name="fixed_log_prob",
                                                  dtype=tf.float32)
        self.policy_surr_loss = self.ppo_loss(self.sy_lp_n, self.sy_fixed_lp_n,
                                              self.sy_adv_n)
        self.policy_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                                scope=self.scope)
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.policy_update_op = minimize_and_clip(optimizer,
                                                  self.policy_surr_loss,
                                                  var_list=self.policy_weights,
                                                  clip_val=40)
示例#51
0
    def __init__(self, num_words, num_chars, num_classes, num_steps, word_len, embedding_matrix=None):
        # Parameter
        self.config = Config()
        self.dropout_rate = self.config.model_para['dropout_rate']
        self.batch_size = self.config.model_para['batch_size']
        self.num_layers = self.config.model_para['lstm_layer_num']
        self.input_dim = self.config.model_para['input_dim']
        self.hidden_dim = self.config.model_para['hidden_dim']
        self.char_input_dim = self.config.model_para['char_input_dim']
        self.char_hidden_dim = self.config.model_para['char_hidden_dim']
        self.use_pa_learning = self.config.model_para['use_pa_learning']
        
        self.embedding_matrix = embedding_matrix
        
        self.word_len = word_len
        self.num_steps = num_steps
        self.num_words = num_words
        self.num_chars = num_chars
        self.num_classes = num_classes
        
        
        self.char_inputs = tf.placeholder(tf.int32, [None, self.word_len])

        with tf.variable_scope("character-based-emb"):
            # char embedding
            self.char_embedding = tf.get_variable("char_emb", [self.num_chars, self.char_input_dim])

            self.char_inputs_emb = tf.nn.embedding_lookup(self.char_embedding, self.char_inputs)
            self.char_inputs_emb = tf.transpose(self.char_inputs_emb, [1, 0, 2])
            self.char_inputs_emb = tf.reshape(self.char_inputs_emb, [-1, self.char_input_dim])
            self.char_inputs_emb = tf.split(self.char_inputs_emb, self.word_len, 0)
            
        # char forward and backward
        with tf.variable_scope("char-bi-lstm"):
            # char lstm cell
            char_lstm_cell_fw = rnn.LSTMCell(self.char_hidden_dim)
            char_lstm_cell_bw = rnn.LSTMCell(self.char_hidden_dim)

            # get the length of each word
            self.word_length = tf.reduce_sum(tf.sign(self.char_inputs), reduction_indices=1)
            self.word_length = tf.cast(self.word_length, tf.int32)

            char_outputs, f_output, r_output = tf.contrib.rnn.static_bidirectional_rnn(
                char_lstm_cell_fw, 
                char_lstm_cell_bw,
                self.char_inputs_emb, 
                dtype=tf.float32,
                sequence_length=self.word_length
            )
        final_word_output = tf.concat([f_output.h, r_output.h], -1)

        self.word_lstm_last_output = tf.reshape(final_word_output, [-1, self.num_steps, self.char_hidden_dim*2])
        
        # '''
        #     word input
        # '''
        with tf.variable_scope("word-based-emb"):
            self.inputs = tf.placeholder(tf.int32, [None, self.num_steps])
            # self.targets = tf.placeholder(tf.int32, [None, self.num_steps])
            if self.use_pa_learning:
                self.targets = tf.placeholder(tf.float32, [None, self.num_steps+2, self.num_classes+1])
            else:
                self.targets = tf.placeholder(tf.int32, [None, self.num_steps])
            self.targets_transition = tf.placeholder(tf.int32, [None])
            self.keep_prob = tf.placeholder(tf.float32)

            if embedding_matrix is not None:
                self.embedding = tf.Variable(embedding_matrix, trainable=True, name="word_emb", dtype=tf.float32)
            else:
                self.embedding = tf.get_variable("word_emb", [self.num_words, self.input_dim])

            self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs)
            self.inputs_emb = tf.concat([self.inputs_emb, self.word_lstm_last_output], -1)

            self.inputs_emb = tf.nn.dropout(self.inputs_emb, self.keep_prob)
            self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2])
            self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.input_dim+self.char_hidden_dim*2])
            self.inputs_emb = tf.split(self.inputs_emb, self.num_steps, 0)

            # word lstm cell
            lstm_cell_fw = rnn.LSTMCell(self.hidden_dim)
            lstm_cell_bw = rnn.LSTMCell(self.hidden_dim)

            # get the length of each sample
            self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1)
            self.length = tf.cast(self.length, tf.int32) 
        
        # forward and backward
        with tf.variable_scope("word-bi-lstm"):
            self.outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(
                lstm_cell_fw, 
                lstm_cell_bw,
                self.inputs_emb, 
                dtype=tf.float32,
                sequence_length=self.length
            )

        # bidirect concat
        final_outputs = tf.reshape(tf.concat(self.outputs, 1), [-1, self.hidden_dim * 2])
        tanh_layer_w = tf.get_variable("tanh_layer_w", [self.hidden_dim * 2, self.hidden_dim])
        tanh_layer_b = tf.get_variable("tanh_layer_b", [self.hidden_dim])
        self.final_outputs = tf.tanh(tf.matmul(final_outputs, tanh_layer_w) + tanh_layer_b)

        
 
    # def add_placeholders(self):
#         '''
#             char input = sen_batch * sen_len
#         '''
#         self.char_inputs = tf.placeholder(tf.int32, [None, self.word_len])
#         '''
#             word input
#         '''
#         self.inputs = tf.placeholder(tf.int32, [None, self.num_steps])
#         self.targets = tf.placeholder(tf.int32, [None, self.num_steps])
#         self.targets_transition = tf.placeholder(tf.int32, [None])
#         self.keep_prob = tf.placeholder(tf.float32)

#     def add_lookup_op(self):
#         with tf.variable_scope("character-based-emb"):
#             # char embedding
#             self.char_embedding = tf.get_variable("char_emb", [self.num_chars, self.char_input_dim])
#             self.char_inputs_emb = tf.nn.embedding_lookup(self.char_embedding, self.char_inputs)
            
#         with tf.variable_scope("word-based-emb"):
#             if self.embedding_matrix is not None:
#                 self.embedding = tf.Variable(self.embedding_matrix, trainable=True, name="word_emb", dtype=tf.float32)
#             else:
#                 self.embedding = tf.get_variable("word_emb", [self.num_words, self.input_dim])
#             self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs)

#     def add_feature_extractor_op(self):
#         with tf.variable_scope("char_bi-lstm"):
#             self.char_inputs_emb = tf.transpose(self.char_inputs_emb, [1, 0, 2])
#             self.char_inputs_emb = tf.reshape(self.char_inputs_emb, [-1, self.char_input_dim])
#             self.char_inputs_emb = tf.split(self.char_inputs_emb, self.word_len, 0)

#             # char lstm cell
#             char_lstm_cell_fw = rnn.LSTMCell(self.char_hidden_dim)
#             char_lstm_cell_bw = rnn.LSTMCell(self.char_hidden_dim)

#             # get the length of each word
#             self.word_length = tf.reduce_sum(tf.sign(self.char_inputs), reduction_indices=1)
#             self.word_length = tf.cast(self.word_length, tf.int32)
            
#             char_outputs, f_output, r_output = tf.contrib.rnn.static_bidirectional_rnn(
#                 char_lstm_cell_fw, 
#                 char_lstm_cell_bw,
#                 self.char_inputs_emb, 
#                 dtype=tf.float32,
#                 sequence_length=self.word_length
#             )
#             final_word_output = tf.concat([f_output.h, r_output.h], -1)
#             self.word_lstm_last_output = tf.reshape(final_word_output, [-1, self.num_steps, self.char_hidden_dim*2])
            
#         with tf.variable_scope("word_bi-lstm"):
#             self.inputs_emb = tf.concat([self.inputs_emb, self.word_lstm_last_output], -1)
#             self.inputs_emb = tf.nn.dropout(self.inputs_emb, self.keep_prob)
#             self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2])
#             self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.input_dim+self.char_hidden_dim*2])
#             # self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.input_dim])

#             self.inputs_emb = tf.split(self.inputs_emb, self.num_steps, 0)

#             # word lstm cell
#             lstm_cell_fw = rnn.LSTMCell(self.hidden_dim)
#             lstm_cell_bw = rnn.LSTMCell(self.hidden_dim)

#             # get the length of each sample
#             self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1)
#             self.length = tf.cast(self.length, tf.int32) 
            
#             self.outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(
#                 lstm_cell_fw, 
#                 lstm_cell_bw,
#                 self.inputs_emb, 
#                 dtype=tf.float32,
#                 sequence_length=self.length
#             )
    
#         with tf.variable_scope("bidirect-concat"):
#             final_outputs = tf.reshape(tf.concat(self.outputs, 1), [-1, self.hidden_dim * 2])
#             tanh_layer_w = tf.get_variable("tanh_layer_w", [self.hidden_dim * 2, self.hidden_dim])
#             tanh_layer_b = tf.get_variable("tanh_layer_b", [self.hidden_dim])
#             self.final_outputs = tf.tanh(tf.matmul(final_outputs, tanh_layer_w) + tanh_layer_b)

    # def forward(self):
    #     self.add_placeholders()
    #     self.add_lookup_op()
    #     self.add_feature_extractor_op()
        # return self.final_outputs, self.length
        
        
        
示例#52
0
    def run(self,
            *in_arrays: Tuple[Union[np.ndarray, None], ...],
            input_transform: dict = None,
            output_transform: dict = None,
            return_as_list: bool = False,
            print_progress: bool = False,
            minibatch_size: int = None,
            num_gpus: int = 1,
            assume_frozen: bool = False,
            **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]:
        """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s).

        Args:
            input_transform:    A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network.
                                The dict must contain a 'func' field that points to a top-level function. The function is called with the input
                                TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
            output_transform:   A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network.
                                The dict must contain a 'func' field that points to a top-level function. The function is called with the output
                                TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
            return_as_list:     True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs.
            print_progress:     Print progress to the console? Useful for very large input arrays.
            minibatch_size:     Maximum minibatch size to use, None = disable batching.
            num_gpus:           Number of GPUs to use.
            assume_frozen:      Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls.
            dynamic_kwargs:     Additional keyword arguments to be passed into the network build function.
        """
        assert len(in_arrays) == self.num_inputs
        assert not all(arr is None for arr in in_arrays)
        assert input_transform is None or util.is_top_level_function(input_transform["func"])
        assert output_transform is None or util.is_top_level_function(output_transform["func"])
        output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs)
        num_items = in_arrays[0].shape[0]
        if minibatch_size is None:
            minibatch_size = num_items

        # Construct unique hash key from all arguments that affect the TensorFlow graph.
        key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs)
        def unwind_key(obj):
            if isinstance(obj, dict):
                return [(key, unwind_key(value)) for key, value in sorted(obj.items())]
            if callable(obj):
                return util.get_top_level_function_name(obj)
            return obj
        key = repr(unwind_key(key))

        # Build graph.
        if key not in self._run_cache:
            with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None):
                with tf.device("/cpu:0"):
                    in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names]
                    in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr]))

                out_split = []
                for gpu in range(num_gpus):
                    with tf.device("/gpu:%d" % gpu):
                        net_gpu = self.clone() if assume_frozen else self
                        in_gpu = in_split[gpu]

                        if input_transform is not None:
                            in_kwargs = dict(input_transform)
                            in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs)
                            in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu)

                        # assert len(in_gpu) == self.num_inputs
                        out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs)

                        if output_transform is not None:
                            out_kwargs = dict(output_transform)
                            out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs)
                            out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu)

                        # assert len(out_gpu) == self.num_outputs
                        out_split.append(out_gpu)

                with tf.device("/cpu:0"):
                    out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)]
                    self._run_cache[key] = in_expr, out_expr

        # Run minibatches.
        in_expr, out_expr = self._run_cache[key]
        out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr]

        for mb_begin in range(0, num_items, minibatch_size):
            if print_progress:
                print("\r%d / %d" % (mb_begin, num_items), end="")

            mb_end = min(mb_begin + minibatch_size, num_items)
            mb_num = mb_end - mb_begin
            mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)]
            mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in)))

            for dst, src in zip(out_arrays, mb_out):
                dst[mb_begin: mb_end] = src

        # Done.
        if print_progress:
            print("\r%d / %d" % (num_items, num_items))

        if not return_as_list:
            out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays)
        return out_arrays
示例#53
0
    def __init__(self, vgg16_npy_path=None, restore_from=None):
        # pre-trained parameters
        try:
            self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
        except FileNotFoundError:
            print(
                'Please download VGG16 parameters at here https://mega.nz/#!YU1FWJrA!O1ywiCS2IiOlUCtCpI6HTJOMrneN-Qdv3ywQP5poecM'
            )

        self.tfx = tf.placeholder(tf.float32, [None, 224, 224, 3])
        self.tfy = tf.placeholder(tf.float32, [None, 1])

        # Convert RGB to BGR
        red, green, blue = tf.split(axis=3,
                                    num_or_size_splits=3,
                                    value=self.tfx * 255.0)
        bgr = tf.concat(axis=3,
                        values=[
                            blue - self.vgg_mean[0],
                            green - self.vgg_mean[1],
                            red - self.vgg_mean[2],
                        ])

        # pre-trained VGG layers are fixed in fine-tune
        conv1_1 = self.conv_layer(bgr, "conv1_1")
        conv1_2 = self.conv_layer(conv1_1, "conv1_2")
        pool1 = self.max_pool(conv1_2, 'pool1')

        conv2_1 = self.conv_layer(pool1, "conv2_1")
        conv2_2 = self.conv_layer(conv2_1, "conv2_2")
        pool2 = self.max_pool(conv2_2, 'pool2')

        conv3_1 = self.conv_layer(pool2, "conv3_1")
        conv3_2 = self.conv_layer(conv3_1, "conv3_2")
        conv3_3 = self.conv_layer(conv3_2, "conv3_3")
        pool3 = self.max_pool(conv3_3, 'pool3')

        conv4_1 = self.conv_layer(pool3, "conv4_1")
        conv4_2 = self.conv_layer(conv4_1, "conv4_2")
        conv4_3 = self.conv_layer(conv4_2, "conv4_3")
        pool4 = self.max_pool(conv4_3, 'pool4')

        conv5_1 = self.conv_layer(pool4, "conv5_1")
        conv5_2 = self.conv_layer(conv5_1, "conv5_2")
        conv5_3 = self.conv_layer(conv5_2, "conv5_3")
        pool5 = self.max_pool(conv5_3, 'pool5')

        # detach original VGG fc layers and
        # reconstruct your own fc layers serve for your own purpose
        self.flatten = tf.reshape(pool5, [-1, 7 * 7 * 512])
        self.fc6 = tf.layers.dense(self.flatten, 256, tf.nn.relu, name='fc6')
        self.out = tf.layers.dense(self.fc6, 1, name='out')

        self.sess = tf.Session()
        if restore_from:
            saver = tf.train.Saver()
            saver.restore(self.sess, restore_from)
        else:  # training graph
            self.loss = tf.losses.mean_squared_error(labels=self.tfy,
                                                     predictions=self.out)
            self.train_op = tf.train.RMSPropOptimizer(0.001).minimize(
                self.loss)
            self.sess.run(tf.global_variables_initializer())
  def extract_features(self, preprocessed_inputs, state_saver=None,
                       state_name='lstm_state', unroll_length=10, scope=None):
    """Extract features from preprocessed inputs.

    The features include the base network features, lstm features and SSD
    features, organized in the following name scope:

    <scope>/MobilenetV2_1/...
    <scope>/MobilenetV2_2/...
    <scope>/LSTM/...
    <scope>/FeatureMap/...

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of consecutive frames from video clips.
      state_saver: A state saver object with methods `state` and `save_state`.
      state_name: Python string, the name to use with the state_saver.
      unroll_length: number of steps to unroll the lstm.
      scope: Scope for the base network of the feature extractor.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    Raises:
      ValueError: if interleave_method not recognized or large and small base
        network output feature maps of different sizes.
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)
    preprocessed_inputs = ops.pad_to_multiple(
        preprocessed_inputs, self._pad_to_multiple)
    batch_size = preprocessed_inputs.shape[0].value / unroll_length
    batch_axis = 0
    nets = []

    # Batch processing of mobilenet features.
    with slim.arg_scope(mobilenet_v2.training_scope(
        is_training=self._is_training,
        bn_decay=0.9997)), \
        slim.arg_scope([mobilenet.depth_multiplier],
                       min_depth=self._min_depth, divisible_by=8):
      # Big model.
      net, _ = self.extract_base_features_large(preprocessed_inputs)
      nets.append(net)
      large_base_feature_shape = net.shape

      # Small models
      net, _ = self.extract_base_features_small(preprocessed_inputs)
      nets.append(net)
      small_base_feature_shape = net.shape
      if not (large_base_feature_shape[1] == small_base_feature_shape[1] and
              large_base_feature_shape[2] == small_base_feature_shape[2]):
        raise ValueError('Large and Small base network feature map dimension '
                         'not equal!')

    with slim.arg_scope(self._conv_hyperparams_fn()):
      with tf.variable_scope('LSTM', reuse=self._reuse_weights) as lstm_scope:
        output_size = (large_base_feature_shape[1], large_base_feature_shape[2])
        lstm_cell, init_state, step = self.create_lstm_cell(
            batch_size, output_size, state_saver, state_name)

        nets_seq = [
            tf.split(net, unroll_length, axis=batch_axis) for net in nets
        ]

        net_seq, states_out = rnn_decoder.multi_input_rnn_decoder(
            nets_seq,
            init_state,
            lstm_cell,
            step,
            selection_strategy=self._interleave_method,
            is_training=self._is_training,
            pre_bottleneck=self._pre_bottleneck,
            flatten_state=self._flatten_state,
            scope=lstm_scope)
        self._states_out = states_out

      batcher_ops = None
      if state_saver is not None:
        self._step = state_saver.state(state_name + '_step')
        batcher_ops = [
            state_saver.save_state(state_name + '_c', states_out[-1][0]),
            state_saver.save_state(state_name + '_h', states_out[-1][1]),
            state_saver.save_state(state_name + '_step', self._step + 1)]
      image_features = {}
      with tf_ops.control_dependencies(batcher_ops):
        image_features['layer_19'] = tf.concat(net_seq, 0)

      # SSD layers.
      with tf.variable_scope('FeatureMap'):
        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=self._feature_map_layout,
            depth_multiplier=self._depth_multiplier,
            min_depth=self._min_depth,
            insert_1x1_conv=True,
            image_features=image_features,
            pool_residual=True)
    return feature_maps.values()
示例#55
0
文件: pfnl.py 项目: saiku-20/History
    def forward(self, x):
        def _NonLocalBlock(input_x,
                           out_channels,
                           sub_sample=1,
                           nltype=0,
                           is_bn=False,
                           scope='NonLocalBlock'):
            """
            https://github.com/nnUyi/Non-Local_Nets-Tensorflow
            """
            batchsize, height, width, in_channels = input_x.get_shape(
            ).as_list()
            typedict = {
                0: 'embedded_gaussian',
                1: 'gaussian',
                2: 'dot_product',
                3: 'concat'
            }
            with tf.variable_scope(scope) as sc:
                if nltype <= 2:
                    with tf.variable_scope('g') as scope:
                        g = conv2d(input_x,
                                   out_channels,
                                   1,
                                   strides=1,
                                   padding='same',
                                   name='g')
                        if sub_sample > 1:
                            g = average_pooling2d(g,
                                                  pool_size=sub_sample,
                                                  strides=sub_sample,
                                                  name='g_pool')

                    with tf.variable_scope('phi') as scope:
                        if nltype == 0 or nltype == 2:
                            phi = conv2d(input_x,
                                         out_channels,
                                         1,
                                         strides=1,
                                         padding='same',
                                         name='phi')
                        elif nltype == 1:
                            phi = input_x
                        if sub_sample > 1:
                            phi = average_pooling2d(phi,
                                                    pool_size=sub_sample,
                                                    strides=sub_sample,
                                                    name='phi_pool')

                    with tf.variable_scope('theta') as scope:
                        if nltype == 0 or nltype == 2:
                            theta = conv2d(input_x,
                                           out_channels,
                                           1,
                                           strides=1,
                                           padding='same',
                                           name='theta')
                        elif nltype == 1:
                            theta = input_x

                    g_x = tf.reshape(g, [batchsize, -1, out_channels])
                    theta_x = tf.reshape(theta, [batchsize, -1, out_channels])

                    # theta_x = tf.reshape(theta, [batchsize, out_channels, -1])
                    # theta_x = tf.transpose(theta_x, [0,2,1])
                    phi_x = tf.reshape(phi, [batchsize, -1, out_channels])
                    phi_x = tf.transpose(phi_x, [0, 2, 1])
                    #phi_x = tf.reshape(phi_x, [batchsize, out_channels, -1])

                    f = tf.matmul(theta_x, phi_x)
                    # ???
                    if nltype <= 1:
                        # f_softmax = tf.nn.softmax(f, -1)
                        f = tf.exp(f)
                        f_softmax = f / tf.reduce_sum(
                            f, axis=-1, keepdims=True)
                    elif nltype == 2:
                        f = tf.nn.relu(f)  #/int(f.shape[-1])
                        f_mean = tf.reduce_sum(f, axis=[2], keepdims=True)
                        #print(f.shape,f_mean.shape)
                        f_softmax = f / f_mean
                    y = tf.matmul(f_softmax, g_x)
                    y = tf.reshape(y, [batchsize, height, width, out_channels])
                    with tf.variable_scope('w') as scope:
                        w_y = conv2d(y,
                                     in_channels,
                                     1,
                                     strides=1,
                                     padding='same',
                                     name='w')
                        # if is_bn:
                        #     w_y = slim.batch_norm(w_y)
                    z = w_y  #input_x + w_y
                    return z

        """
        hyper-paras
        """
        mf = 64  # output feature map num for most convs
        dk = 3  # kernel size for most convs
        ds = 1  # stride for most convs
        activate = tf.nn.leaky_relu
        num_block = self.num_block  # progressive fusion block num
        ki = tf.contrib.layers.xavier_initializer()

        n, nf, w, h, c = x.shape
        # print(n, nf, w, h, c)

        with tf.variable_scope('network', reuse=tf.AUTO_REUSE) as scope:
            conv0 = Conv2D(mf,
                           5,
                           strides=ds,
                           padding='same',
                           activation=activate,
                           kernel_initializer=ki,
                           name='conv0')
            conv1 = [
                Conv2D(mf,
                       dk,
                       strides=ds,
                       padding='same',
                       activation=activate,
                       kernel_initializer=ki,
                       name='conv1_{}'.format(i)) for i in range(num_block)
            ]
            conv10 = [
                Conv2D(mf,
                       1,
                       strides=ds,
                       padding='same',
                       activation=activate,
                       kernel_initializer=ki,
                       name='conv10_{}'.format(i)) for i in range(num_block)
            ]
            conv2 = [
                Conv2D(mf,
                       dk,
                       strides=ds,
                       padding='same',
                       activation=activate,
                       kernel_initializer=ki,
                       name='conv2_{}'.format(i)) for i in range(num_block)
            ]
            convout4 = Conv2D(256,
                              3,
                              strides=ds,
                              padding='same',
                              activation=activate,
                              kernel_initializer=ki,
                              name='convout4')
            convout3 = Conv2D(128,
                              3,
                              strides=ds,
                              padding='same',
                              activation=activate,
                              kernel_initializer=ki,
                              name='convout3')
            convout2 = Conv2D(64,
                              3,
                              strides=ds,
                              padding='same',
                              activation=activate,
                              kernel_initializer=ki,
                              name='convout2')
            convout1 = Conv2D(3,
                              3,
                              strides=ds,
                              padding='same',
                              activation=None,
                              kernel_initializer=ki,
                              name='convout1')
            """
            center for residual add
            """
            center_tensor = x[:,
                              self.num_frames // 2, :, :, :]  # (bs, h, w, c)
            # print(center_tensor.get_shape())
            """
            nonlocal + res
            """
            inp0 = [x[:, i, :, :, :]
                    for i in range(nf)]  # [nf * (bs, h, w, c)]
            inp0 = tf.concat(inp0, axis=-1)  # (bs, h, w, c*nf)
            # print(inp0.get_shape())

            if self.refactor > 1:
                inp1 = tf.space_to_depth(
                    inp0, self.refactor
                )  # space2depth: (h, w, c) -> (h//2, w//2, c*4)
            else:
                inp1 = inp0
            inp1 = _NonLocalBlock(inp1,
                                  int(c) * self.num_frames * self.refactor *
                                  self.refactor,
                                  sub_sample=1,
                                  nltype=1,
                                  scope='nlblock_{}'.format(0))
            if self.refactor > 1:
                inp1 = tf.depth_to_space(
                    inp1, self.refactor
                )  # depth2space: (h//2, w//2, c*4) -> (h, w, c)

            inp0 += inp1  # (bs, h, w, c*nf)
            """
            5x5 conv
            """
            inp0 = tf.split(inp0, num_or_size_splits=self.num_frames,
                            axis=-1)  # [nf * (bs, h, w, c)]
            # print(len(inp0))
            # print(inp0[0].get_shape())
            inp0 = [conv0(f) for f in inp0]  # [nf * (bs, h, w, 64)]
            # print(inp0[0].get_shape())
            """
            progressive fusion blocks
            """
            for i in range(num_block):
                inp1 = [conv1[i](f) for f in inp0]  # [nf * (bs, h, w, 64)]
                base = tf.concat(inp1, axis=-1)  # (bs, h, w, 64*nf)
                base = conv10[i](base)
                inp2 = [tf.concat([base, f], -1) for f in inp1]
                inp2 = [conv2[i](f) for f in inp2]
                inp0 = [tf.add(inp0[j], inp2[j])
                        for j in range(nf)]  # [nf * (bs, h, w, 64)]
            """
            merge
            """
            merge = tf.concat(inp0, axis=-1)  # (bs, h, w, 64*nf)
            out = convout4(merge)
            out = convout3(out)
            out = convout2(out)
            out = convout1(out)  # (bs, h, w, c)
        """
        residual
        """
        return tf.stack([out + center_tensor], axis=1,
                        name='out')  # (bs, h, w, c) -> (bs, 1, h, w, c)
示例#56
0
    def _build_graph(self, inputs):



        # ========================== Convert Color Space ==========================
        lr_rgb, hr_rgb = inputs
        lr_y, hr_y = rgb2y(lr_rgb), rgb2y(hr_rgb)
        lr_ycbcr, hr_ycbcr = rgb2ycbcr(lr_rgb), rgb2ycbcr(hr_rgb)
        # (b, t, h, w, c) to (b, h, w, c) * t
        lr_y = tf.split(lr_y, cfg.frames, axis = 1)
        lr_y = [tf.reshape(i, (-1, h, w, 1)) for i in lr_y]
        lr_rgb = tf.split(lr_rgb, cfg.frames, axis = 1)
        lr_rgb = [tf.reshape(i, (-1, h, w, 3)) for i in lr_rgb]
        lr_ycbcr = tf.split(lr_ycbcr, cfg.frames, axis = 1)
        lr_ycbcr = [tf.reshape(i, (-1, h, w, 3)) for i in lr_ycbcr]

        # ========================== split ==========================


        # ========================== Normalization ==========================
        lr_y = [i / 255.0 - 0.5 for i in lr_y]
        lr_rgb = [i / 255.0 - 0.5 for i in lr_rgb]
        lr_ycbcr = [i / 255.0 - 0.5 for i in lr_ycbcr]
        hr_y = hr_y / 255.0 - 0.5
        referenced_rgb = lr_rgb[cfg.frames // 2]
        referenced_y = lr_y[cfg.frames // 2]
        ref_ycbcr = lr_ycbcr[cfg.frames // 2]



        # ========================== Forward ==========================
        hr_sparses = []
        flows = []
        warped = []
        coords = get_coords(h, w)
        with tf.variable_scope("ME_SPMC") as scope:
            for i, j in zip(lr_y, lr_rgb):
                flow_i0 = motion_estimation(referenced_y, i) * h / 2
                # freeze in stage 2
                if self.stage == 2:
                    flow_i0 = tf.stop_gradient(flow_i0)         
                flows.append(flow_i0)
                hr_sparses.append(spmc_layer(i, flow_i0))
                mapping = coords - flow_i0
                backward_warped_img = BackwardWarping('backward_warpped', [referenced_y, mapping], borderMode='constant')
                warped.append(backward_warped_img)
                scope.reuse_variables()
        hr_denses = detail_fusion_net(hr_sparses, ref_ycbcr[:, :, :, :1])

        # ========================== Outputs ==========================
        flow_after_reshape = [tf.reshape(i, (-1, 1, h, w, 2)) for i in flows]
        tf_flows = tf.concat(flow_after_reshape, axis = 1, name = 'flows')
        warped_after_reshape = [tf.reshape(i, (-1, 1, h, w, 1)) for i in warped]
        after_warp = tf.concat(warped_after_reshape, axis = 1, name = 'after_warp')

        padh = int(math.ceil(h / 4.0) * 4.0 - h)
        padw = int(math.ceil(w / 4.0) * 4.0 - w)

        scale_factor = 2

        # Unormalization
        output_y = (hr_denses[-1] + 0.5) * 255.
        # Unormalization, bicubic插值
        output_cbcr = tf.image.resize_images(
                        (ref_ycbcr + 0.5) * 255.0,
                        [(h + padh) * scale_factor, (w + padw) * scale_factor],
                        method = 2)[:, :, :, 1:3]
        # Y: 模型输出 Cb&Cr: bicubic插值
        prediction = tf.concat([output_y, output_cbcr], axis = -1)
        # convert YCbCr to RGB
        prediction = tf.identity(ycbcr2rgb(prediction), name = 'predictions')

        # ========================== Cost Functions ==========================
        k = np.arange(*k_range, 0.5 / cfg.frames)
        mask_warped = []
        warp_loss = []
        mask_warp_loss = []
        flow_loss = []
        euclidean_loss = []
        for i in range(cfg.frames):
            mapping = coords - flows[i]
            mask1 = tf.greater_equal(mapping[:,:,:,:1], 0.0)
            mask2 = tf.less_equal(mapping[:,:,:,:1], h-1)
            mask3 = tf.greater_equal(mapping[:,:,:,1:], 0.0)
            mask4 = tf.less_equal(mapping[:,:,:,1:], w-1)
            mask12 = tf.logical_and(mask1, mask2)
            mask34 = tf.logical_and(mask3, mask4)
            mask = tf.cast(tf.logical_and(mask12, mask34), tf.float32)

            mask_warped.append(self._unorm(warped[i], mask))
            mask_warp_loss.append(tf.reduce_sum(mask * tf.abs(lr_y[i] - warped[i])) / tf.reduce_sum(mask) * tf.reduce_sum(tf.ones_like(mask)))
            warp_loss.append(tf.reduce_sum(tf.abs(lr_y[i] - warped[i])))
            flow_loss.append(tf.reduce_sum(tf.abs(tf.image.total_variation(flows[i]))))
            euclidean_loss.append(tf.reduce_sum(tf.square(hr_y - hr_denses[i])))

        loss_me_1 = tf.reduce_sum([mask_warp_loss[i] for i in range(cfg.frames)])
        loss_me_2 = tf.reduce_sum([flow_loss[i] for i in range(cfg.frames)])
        loss_me = loss_me_1 + cfg.lambda1 * loss_me_2
        loss_sr = tf.reduce_sum([k[i] * euclidean_loss[i] for i in range(cfg.frames)])
        
        costs = [
            loss_me,
            loss_sr,
            loss_sr + cfg.lambda2 * loss_me
        ]
        self.cost = tf.identity(costs[self.stage - 1], name = 'cost')

        # ========================================== Summary ==========================================
        tf.summary.image('input', referenced_rgb, max_outputs = 3)
        tf.summary.image('groundtruth', hr_rgb, max_outputs=3)
        tf.summary.image('frame_pair_1', tf.concat([self._unorm(referenced_y), self._unorm(lr_y[0]), mask_warped[0]], axis=1), max_outputs=3)
        tf.summary.image('frame_pair_2', tf.concat([self._unorm(referenced_y), self._unorm(lr_y[1]), mask_warped[1]], axis=1), max_outputs=3)
        tf.summary.image('flow', flow_to_color(flows[0]), max_outputs=3)
        # tf.summary.image('flow_1', tf.concat([flows[0][:,:,:,:1], flows[0][:,:,:,1:]], axis=1), max_outputs=3)
        # tf.summary.image('flow_2', tf.concat([flows[1][:,:,:,:1], flows[1][:,:,:,1:]], axis=1), max_outputs=3)
        # tf.summary.image('reference_frame', referenced, max_outputs=3)
        tf.summary.image('output', prediction, max_outputs=3)
        add_moving_summary(
            # tf.identity(loss_me_1, name = 'warp_loss'),
            # tf.identity(loss_me_2, name = 'flow_loss'),
            tf.identity(loss_me, name = 'loss_me'),
            tf.identity(loss_sr, name = 'loss_sr'),
            self.cost
        )
示例#57
0
def _split_tensor(values, num_splits, axis):
    if tf.__version__ == '0.12.0':
        return tf.split(axis, num_splits, values)
    else:
        return tf.split(values, num_splits, axis=axis)
    def run(
        self,
        *in_arrays,
        return_as_list=False,  # True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs.
        print_progress=False,  # Print progress to the console? Useful for very large input arrays.
        minibatch_size=None,  # Maximum minibatch size to use, None = disable batching.
        num_gpus=1,  # Number of GPUs to use.
        out_mul=1.0,  # Multiplicative constant to apply to the output(s).
        out_add=0.0,  # Additive constant to apply to the output(s).
        out_shrink=1,  # Shrink the spatial dimensions of the output(s) by the given factor.
        out_dtype=None,  # Convert the output to the specified data type.
        **dynamic_kwargs
    ):  # Additional keyword arguments to pass into the network construction function.

        assert len(in_arrays) == self.num_inputs
        num_items = in_arrays[0].shape[0]
        if minibatch_size is None:
            minibatch_size = num_items
        key = str([
            list(sorted(dynamic_kwargs.items())), num_gpus, out_mul, out_add,
            out_shrink, out_dtype
        ])

        # Build graph.
        if key not in self._run_cache:
            with absolute_name_scope(self.scope +
                                     '/Run'), tf.control_dependencies(None):
                in_split = list(
                    zip(*[tf.split(x, num_gpus)
                          for x in self.input_templates]))
                out_split = []
                for gpu in range(num_gpus):
                    with tf.device('/gpu:%d' % gpu):
                        out_expr = self.get_output_for(*in_split[gpu],
                                                       return_as_list=True,
                                                       **dynamic_kwargs)
                        if out_mul != 1.0:
                            out_expr = [x * out_mul for x in out_expr]
                        if out_add != 0.0:
                            out_expr = [x + out_add for x in out_expr]
                        if out_shrink > 1:
                            ksize = [1, 1, out_shrink, out_shrink]
                            out_expr = [
                                tf.nn.avg_pool(x,
                                               ksize=ksize,
                                               strides=ksize,
                                               padding='VALID',
                                               data_format='NCHW')
                                for x in out_expr
                            ]
                        if out_dtype is not None:
                            if tf.as_dtype(out_dtype).is_integer:
                                out_expr = [tf.round(x) for x in out_expr]
                            out_expr = [
                                tf.saturate_cast(x, out_dtype)
                                for x in out_expr
                            ]
                        out_split.append(out_expr)
                self._run_cache[key] = [
                    tf.concat(outputs, axis=0) for outputs in zip(*out_split)
                ]

        # Run minibatches.
        out_expr = self._run_cache[key]
        out_arrays = [
            np.empty([num_items] + shape_to_list(expr.shape)[1:],
                     expr.dtype.name) for expr in out_expr
        ]
        for mb_begin in range(0, num_items, minibatch_size):
            if print_progress:
                print('\r%d / %d' % (mb_begin, num_items), end='')
            mb_end = min(mb_begin + minibatch_size, num_items)
            mb_in = [src[mb_begin:mb_end] for src in in_arrays]
            mb_out = tf.get_default_session().run(
                out_expr, dict(zip(self.input_templates, mb_in)))
            for dst, src in zip(out_arrays, mb_out):
                dst[mb_begin:mb_end] = src

        # Done.
        if print_progress:
            print('\r%d / %d' % (num_items, num_items))
        if not return_as_list:
            out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(
                out_arrays)
        return out_arrays
示例#59
0
def evaluate(n_token, cutoffs, ps_device):
    ##### Get input function and model function
    eval_input_fn, eval_record_info = data_utils.get_input_fn(
        record_info_dir=FLAGS.record_info_dir,
        split=FLAGS.eval_split,
        per_host_bsz=FLAGS.eval_batch_size,
        tgt_len=FLAGS.tgt_len,
        num_core_per_host=FLAGS.num_core_per_host,
        num_hosts=1,
        use_tpu=False)

    num_batch = eval_record_info["num_batch"]
    if FLAGS.max_eval_batch > 0:
        num_batch = FLAGS.max_eval_batch
    tf.logging.info("num of batches {}".format(num_batch))

    ##### Create computational graph
    eval_set = eval_input_fn({
        "batch_size": FLAGS.eval_batch_size,
        "data_dir": FLAGS.data_dir
    })

    input_feed, label_feed = eval_set.make_one_shot_iterator().get_next()

    inputs = tf.split(input_feed, FLAGS.num_core_per_host, 0)
    labels = tf.split(label_feed, FLAGS.num_core_per_host, 0)

    per_core_bsz = FLAGS.eval_batch_size // FLAGS.num_core_per_host
    tower_mems, tower_losses, tower_new_mems = [], [], []

    for i in range(FLAGS.num_core_per_host):
        with tf.device(assign_to_gpu(i, ps_device)), \
             tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            mems_i = [
                tf.placeholder(tf.float32,
                               [FLAGS.mem_len, per_core_bsz, FLAGS.d_model])
                for _ in range(FLAGS.n_layer)
            ]

            loss_i, new_mems_i = single_core_graph(n_token=n_token,
                                                   cutoffs=cutoffs,
                                                   is_training=False,
                                                   inp=inputs[i],
                                                   tgt=labels[i],
                                                   mems=mems_i)

            tower_mems.append(mems_i)
            tower_losses.append(loss_i)
            tower_new_mems.append(new_mems_i)

    ## sum losses across towers
    if len(tower_losses) > 1:
        loss = tf.add_n(tower_losses) / len(tower_losses)
    else:
        loss = tower_losses[0]

    ##### Evaluation loop
    tower_mems_np = [[
        np.zeros([FLAGS.mem_len, per_core_bsz, FLAGS.d_model],
                 dtype=np.float32) for layer in range(FLAGS.n_layer)
    ] for core in range(FLAGS.num_core_per_host)]

    saver = tf.train.Saver()

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        sess.run(tf.global_variables_initializer())

        if FLAGS.eval_ckpt_path is None:
            eval_ckpt_path = tf.train.latest_checkpoint(FLAGS.model_dir)
        else:
            eval_ckpt_path = FLAGS.eval_ckpt_path
        tf.logging.info("Evaluate {}".format(eval_ckpt_path))
        saver.restore(sess, eval_ckpt_path)

        fetches = [loss, tower_new_mems, tf.size(label_feed)]

        format_str = "  >> processing batch {{:{0}d}}/{{:{0}d}} ..".format(
            len(str(num_batch)))

        total_loss, total_cnt = 0, 0
        for step in range(num_batch):
            if step % (num_batch // 10) == 0:
                tf.logging.info(format_str.format(step, num_batch))

            feed_dict = {}
            for i in range(FLAGS.num_core_per_host):
                for m, m_np in zip(tower_mems[i], tower_mems_np[i]):
                    feed_dict[m] = m_np

            fetched = sess.run(fetches, feed_dict=feed_dict)

            loss_np, tower_mems_np, cnt_np = fetched[:3]
            total_loss += loss_np * cnt_np
            total_cnt += cnt_np

        avg_loss = total_loss / total_cnt
        tf.logging.info("| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format(
            avg_loss, math.exp(avg_loss), avg_loss / math.log(2)))
示例#60
0
def pretrain_model(epochs, batch_size, train_steps_per_epoch, save_dir):
    # step 1: prepare dataset
    train_data, test_data = load_data()
    pipeline = fe.Pipeline(
        train_data=train_data,
        batch_size=batch_size,
        ops=[
            PadIfNeeded(min_height=40,
                        min_width=40,
                        image_in="x",
                        image_out="x"),

            # augmentation 1
            RandomCrop(32, 32, image_in="x", image_out="x_aug"),
            Sometimes(HorizontalFlip(image_in="x_aug", image_out="x_aug"),
                      prob=0.5),
            Sometimes(ColorJitter(inputs="x_aug",
                                  outputs="x_aug",
                                  brightness=0.8,
                                  contrast=0.8,
                                  saturation=0.8,
                                  hue=0.2),
                      prob=0.8),
            Sometimes(ToGray(inputs="x_aug", outputs="x_aug"), prob=0.2),
            Sometimes(GaussianBlur(inputs="x_aug",
                                   outputs="x_aug",
                                   blur_limit=(3, 3),
                                   sigma_limit=(0.1, 2.0)),
                      prob=0.5),
            ToFloat(inputs="x_aug", outputs="x_aug"),

            # augmentation 2
            RandomCrop(32, 32, image_in="x", image_out="x_aug2"),
            Sometimes(HorizontalFlip(image_in="x_aug2", image_out="x_aug2"),
                      prob=0.5),
            Sometimes(ColorJitter(inputs="x_aug2",
                                  outputs="x_aug2",
                                  brightness=0.8,
                                  contrast=0.8,
                                  saturation=0.8,
                                  hue=0.2),
                      prob=0.8),
            Sometimes(ToGray(inputs="x_aug2", outputs="x_aug2"), prob=0.2),
            Sometimes(GaussianBlur(inputs="x_aug2",
                                   outputs="x_aug2",
                                   blur_limit=(3, 3),
                                   sigma_limit=(0.1, 2.0)),
                      prob=0.5),
            ToFloat(inputs="x_aug2", outputs="x_aug2")
        ])

    # step 2: prepare network
    model_con, model_finetune = fe.build(model_fn=ResNet9,
                                         optimizer_fn=["adam", "adam"])
    network = fe.Network(ops=[
        LambdaOp(lambda x, y: tf.concat([x, y], axis=0),
                 inputs=["x_aug", "x_aug2"],
                 outputs="x_com"),
        ModelOp(model=model_con, inputs="x_com", outputs="y_com"),
        LambdaOp(lambda x: tf.split(x, 2, axis=0),
                 inputs="y_com",
                 outputs=["y_pred", "y_pred2"]),
        NTXentOp(arg1="y_pred",
                 arg2="y_pred2",
                 outputs=["NTXent", "logit", "label"]),
        UpdateOp(model=model_con, loss_name="NTXent")
    ])

    # step 3: prepare estimator
    traces = [
        Accuracy(true_key="label",
                 pred_key="logit",
                 mode="train",
                 output_name="contrastive_accuracy"),
        ModelSaver(model=model_con, save_dir=save_dir),
    ]
    estimator = fe.Estimator(pipeline=pipeline,
                             network=network,
                             epochs=epochs,
                             traces=traces,
                             train_steps_per_epoch=train_steps_per_epoch)
    estimator.fit()

    return model_con, model_finetune