示例#1
0
 def is_duplicate(endpoints):
     """Implements a simple duplicate filter, based on L1 difference in RGB."""
     return tf.greater(
         tf.reduce_mean(tf.abs(endpoints['rgb'][1] - endpoints['rgb'][0])),
         params.input.duplicates_filter_threshold)
    def next_timestep(self, state, action):
        '''Calculate the next state of the quadcopter after one timestep
        Size of tensors' first dimension is the batch size for parallel computation
        Params
        ======
            state: rank-2 tensor, state in the form [position,orientation,vel,ang_vel]
            action: rank-2 tensor, action commands in the form [climb,roll,pitch,yaw]
        Returns
        ======
            rank-2 tensor, next state in the form [position,orientation,vel,ang_vel]
        '''
        eta = state[:, 0:3]
        upsilon = state[:, 3:6]
        Tport = action[:, 0]
        Tstbd = action[:, 1]

        zeros = tf.zeros([self.batch_size], dtype=tf.float32)
        ones = tf.ones([self.batch_size], dtype=tf.float32)
        Xu = tf.where(tf.less(tf.abs(upsilon[:, 0]), 1.2), tf.constant(-0.25, dtype=tf.float32, shape=[
                      self.batch_size]), tf.constant(64.55, dtype=tf.float32, shape=[self.batch_size]))
        Xuu = tf.where(tf.less(tf.abs(upsilon[:, 0]), 1.2), tf.constant(0.0, dtype=tf.float32, shape=[
            self.batch_size]), tf.constant(-70.92, dtype=tf.float32, shape=[self.batch_size]))

        Yv = 0.5*(-40*1000*tf.abs(upsilon[:, 1])) * \
            (1.1+0.0045*(1.01/0.09) - 0.1*(0.27/0.09)+0.016*(tf.pow((0.27/0.09), 2)))
        Yr = 6*(-3.141592*1000) * \
            tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01
        Nv = 0.06*(-3.141592*1000) * \
            tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01
        Nr = 0.02*(-3.141592*1000) * \
            tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01*1.01

        M = tf.constant([[self.boat.physics.m - self.boat.physics.X_u_dot, 0, 0],
                         [0, self.boat.physics.m - self.boat.physics.Y_v_dot,
                             0 - self.boat.physics.Y_r_dot],
                         [0, 0 - self.boat.physics.N_v_dot, self.boat.physics.Iz - self.boat.physics.N_r_dot]])

        T = tf.stack([Tport + self.boat.physics.c*Tstbd, zeros, 0.5 *
                      self.boat.physics.B*(Tport - self.boat.physics.c*Tstbd)], axis=1)
        T = tf.reshape(T, [self.batch_size, 3, 1])

        CRB = tf.stack([[zeros, zeros, -self.boat.physics.m * upsilon[:, 1]],
                        [zeros, zeros, self.boat.physics.m * upsilon[:, 0]],
                        [self.boat.physics.m * upsilon[:, 1], -self.boat.physics.m * upsilon[:, 0], zeros]])
        CRB = tf.transpose(CRB, perm=[2, 0, 1])

        CA = tf.stack([[zeros, zeros, 2 * ((self.boat.physics.Y_v_dot*upsilon[:, 1]) + ((self.boat.physics.Y_r_dot + self.boat.physics.N_v_dot)/2) * upsilon[:, 2])],
                       [zeros, zeros, -self.boat.physics.X_u_dot * self.boat.physics.m * upsilon[:, 0]],
                       [2*(((-self.boat.physics.Y_v_dot) * upsilon[:, 1]) - ((self.boat.physics.Y_r_dot+self.boat.physics.N_v_dot)/2) * upsilon[:, 2]), self.boat.physics.X_u_dot * self.boat.physics.m * upsilon[:, 0], zeros]])
        CA = tf.transpose(CA, perm=[2, 0, 1])

        C = CRB + CA

        Dl = tf.stack([[-Xu, zeros, zeros],
                       [zeros, -Yv, -Yr],
                       [zeros, -Nv, -Nr]])
        Dl = tf.transpose(Dl, perm=[2, 0, 1])

        Dn = tf.stack([[Xuu * abs(upsilon[:, 0]), zeros, zeros],
                       [zeros, self.boat.physics.Yvv * tf.abs(upsilon[:, 1]) + self.boat.physics.Yvr * tf.abs(upsilon[:, 2]), self.boat.physics.Yrv *
                           tf.abs(upsilon[:, 1]) + self.boat.physics.Yrr * tf.abs(upsilon[:, 2])],
                       [zeros, self.boat.physics.Nvv * tf.abs(upsilon[:, 1]) + self.boat.physics.Nvr * tf.abs(upsilon[:, 2]), self.boat.physics.Nrv * tf.abs(upsilon[:, 1]) + self.boat.physics.Nrr * tf.abs(upsilon[:, 2])]])
        Dn = tf.transpose(Dn, perm=[2, 0, 1])

        D = Dl - Dn

        upsilon = tf.reshape(upsilon, [self.batch_size, 3, 1])
        upsilon_dot = tf.matmul(tf.linalg.inv(
            M), (T - tf.matmul(C, upsilon) - tf.matmul(D, upsilon)))

        upsilon = (self.train_dt) * upsilon_dot + upsilon  # integral

        J = tf.stack([[tf.cos(eta[:, 2]), -tf.sin(eta[:, 2]), zeros],
                      [tf.sin(eta[:, 2]), tf.cos(eta[:, 2]), zeros],
                      [zeros, zeros, ones]])
        J = tf.transpose(J, perm=[2, 0, 1])

        eta_dot = tf.matmul(J, upsilon)  # transformation into local reference frame
        eta = tf.reshape(eta, [self.batch_size, 3, 1])
        eta = (self.train_dt)*eta_dot + eta  # integral

        print(eta[:,2])
        eta_ajust = tf.cond((tf.abs(eta[:, 2])> np.pi), lambda: tf.multiply((eta[:,2]/tf.abs(eta[:,2])),(tf.abs(eta[:,2])-2*np.pi)), lambda: eta[:,2])
        #print(lol)
        values = tf.where(tf.greater(tf.abs(eta[:, 2]), np.pi))#, (eta[:,2]/tf.abs(eta[:,2]))*(tf.abs(eta[:.2])-2*np.pi) , eta[:,2])
        #eta[:, 2] = tf.where(tf.greater(tf.abs(eta[:, 2]), np.pi), (eta[:,2]/tf.abs(eta[:,2]))*(tf.abs(eta[:.2])-2*np.pi) , eta[:,2])
        #    eta[2] = (self.eta[2]/abs(self.eta[2]))*(abs(self.eta[2])-2*np.pi)
        #a_list = tf.unstack(eta)
        #print(a_list)
        #a_list[:,2]=lol
        indices = [[tf.range(10),2]]  # A list of coordinates to update.

        shape = [10, 3]  # The shape of the corresponding dense tensor, same as `c`.

        delta = tf.SparseTensor(indices, values, shape)

        result = eta + tf.sparse_tensor_to_dense(delta)
        #https://stackoverflow.com/questions/34685947/adjust-single-value-within-tensor-tensorflow

        eta = tf.reshape(eta, [self.batch_size, 3])
        upsilon = tf.reshape(upsilon, [self.batch_size, 3])
        next_state = tf.concat([eta, upsilon], axis=1)
        reward = self.get_reward(next_state)
        return next_state, reward
示例#3
0
 def compute_error(real, fake, mask):
   return tf.reduce_mean(mask * tf.abs(fake - real))
示例#4
0
def EffectiveSampleSize(states,
                        filter_beyond_lag=300,
                        filter_threshold=0.05,
                        center=True,
                        normalize=True):
  """ESS computation for one single Tensor argument."""

  def _axis_size(x, axis=None):
    """Get number of elements of `x` in `axis`, as type `x.dtype`."""
    if axis is None:
      return tf.cast(tf.size(x), x.dtype)
    return tf.cast(tf.reduce_prod(tf.gather(tf.shape(x), axis)), x.dtype)

  with tf.name_scope(
      "effective_sample_size_single_state",
      values=[states, filter_beyond_lag, filter_threshold]):

    states = tf.convert_to_tensor(states, name="states")
    dt = states.dtype

    # filter_beyond_lag == None ==> auto_corr is the full sequence.
    auto_corr = SanitizedAutoCorrelation(
        states,
        axis=0,
        center=center,
        normalize=normalize,
        max_lags=filter_beyond_lag)
    auto_corr = tf.reduce_mean(auto_corr, 1)
    if filter_threshold is not None:
      filter_threshold = tf.convert_to_tensor(
          filter_threshold, dtype=dt, name="filter_threshold")
      # Get a binary mask to zero out values of auto_corr below the threshold.
      #   mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i,
      #   mask[i, ...] = 0, otherwise.
      # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...]
      # Building step by step,
      #   Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2.
      # Step 1:  mask = [False, False, True, False]
      mask = tf.abs(auto_corr) < filter_threshold
      # Step 2:  mask = [0, 0, 1, 1]
      mask = tf.cast(mask, dtype=dt)
      # Step 3:  mask = [0, 0, 1, 2]
      mask = tf.cumsum(mask, axis=0)
      # Step 4:  mask = [1, 1, 0, 0]
      mask = tf.maximum(1. - mask, 0.)
      auto_corr *= mask

    # With R[k] := auto_corr[k, ...],
    # ESS = N / {1 + 2 * Sum_{k=1}^N (N - k) / N * R[k]}
    #     = N / {-1 + 2 * Sum_{k=0}^N (N - k) / N * R[k]} (since R[0] = 1)
    #     approx N / {-1 + 2 * Sum_{k=0}^M (N - k) / N * R[k]}
    # where M is the filter_beyond_lag truncation point chosen above.

    # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total
    # ndims the same as auto_corr
    n = _axis_size(states, axis=0)
    k = tf.range(0., _axis_size(auto_corr, axis=0))
    nk_factor = (n - k) / n
    if auto_corr.shape.ndims is not None:
      new_shape = [-1] + [1] * (auto_corr.shape.ndims - 1)
    else:
      new_shape = tf.concat(
          ([-1], tf.ones([tf.rank(auto_corr) - 1], dtype=tf.int32)), axis=0)
    nk_factor = tf.reshape(nk_factor, new_shape)

    #return tf.reduce_mean(n / (-1 + 2 * tf.reduce_sum(nk_factor * auto_corr, axis=0)), 0)
    return n / (1.0 + 2 * tf.reduce_sum(
        nk_factor[1:, Ellipsis] * auto_corr[1:, Ellipsis], axis=0))
示例#5
0
tf.disable_v2_behavior()
import input_data

#Build the Training Set

mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

train_pixels, train_list_values = mnist.train.next_batch(100)
test_pixels, test_list_of_values = mnist.test.next_batch(10)

train_pixel_tensor = tf.placeholder("float", [None, 784])
test_pixel_tensor = tf.placeholder("float", [784])

#Cost Function and distance optimization

distance = tf.reduce_sum(tf.abs(
    tf.add(train_pixel_tensor, tf.negative(test_pixel_tensor))),
                         reduction_indices=1)

pred = tf.arg_min(distance, 0)

# Testing and algorithm evaluation

accuracy = 0.
init = tf.initialize_all_variables()
with tf.Session() as sess:
    sess.run(init)
    for i in range(len(test_list_of_values)):
        nn_index = sess.run(pred,
                            feed_dict={
                                train_pixel_tensor: train_pixels,
                                test_pixel_tensor: test_pixels[i, :]
示例#6
0
def tf_blend(X, y, type_to_idx, lr, steps, do_individual_scores=True):
    """
    Does linear combination of solutions, where the weights
    are positive and sum to 1.
    """
    x = X.T
    n_samples, n_features = x.shape
    n_classes = len(type_to_idx)
    classes = np.zeros((n_samples, n_classes))
    strat = np.zeros(n_samples, dtype=int)
    # Stratify CV by type
    for i, (type_, idx) in enumerate(type_to_idx.items()):
        classes[idx, i] = 1
        strat[idx] = i

    # Initialize weights to zero to avoid bias
    logits_init = np.zeros((n_features, 1))

    # Reset graph
    tf.reset_default_graph()
    # Tensorflow placeholders and variables
    classes_tf = tf.placeholder(tf.float32, [None, n_classes])
    x_tf = tf.placeholder(tf.float32, [None, n_features])
    logits_tf = tf.Variable(logits_init, dtype=tf.float32)
    W = tf.nn.softmax(logits_tf, axis=0)
    y_pred = tf.matmul(x_tf, W)
    y_tf = tf.placeholder(tf.float32, [None, 1])
    abs_diff = tf.abs(y_tf - y_pred)
    class_diff = abs_diff * classes_tf
    cost = tf.reduce_sum(class_diff, axis=0)
    mean_cost = cost / (tf.reduce_sum(classes_tf, axis=0) + 1e-9)
    log_cost = tf.math.log(mean_cost + 1e-9) / n_classes
    total_cost = tf.reduce_sum(log_cost)

    test_size = 0.50
    train_step = tf.train.AdamOptimizer(lr).minimize(total_cost)

    # Keep track of progress
    scores = []
    running_weights = []
    running_logits = []

    # Get stratified train, test split
    train, test = sklearn.model_selection.train_test_split(
        np.arange(n_samples),
        stratify=strat,
        test_size=test_size,
        shuffle=True,
        random_state=42)

    batch_size = train.size / 10

    average_steps = 50 if steps > 100 else 20

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for i in range(steps):
            for batch in np.array_split(train, train.size // batch_size):
                feed = {
                    x_tf: x[batch],
                    y_tf: y[batch, None],
                    classes_tf: classes[batch]
                }
                sess.run(train_step, feed_dict=feed)
            np.random.shuffle(train)
            if i > 0:
                weights, ensemble_loss, logits = sess.run(
                    [W, log_cost, logits_tf],
                    feed_dict={
                        x_tf: x[test],
                        y_tf: y[test, None],
                        classes_tf: classes[test]
                    })
                running_logits.append(logits)
                running_weights.append(weights)
                #print(i, sum(ensemble_loss), weights.squeeze()[:5])
        # Average logits and get score and weights
        ensemble_logits = np.mean(running_logits[-average_steps:], 0)
        ensemble_loss, ensemble_weights = sess.run(
            [log_cost, W],
            feed_dict={
                logits_tf: ensemble_logits,
                x_tf: x[test],
                y_tf: y[test, None],
                classes_tf: classes[test]
            })
        scores.append(ensemble_loss)
        # Do individual scores
        if do_individual_scores:
            test_weights = np.zeros((n_features, 1))
            # Calculate loss for individual solutions
            for i in range(n_features):
                test_weights[:] = 0
                test_weights[i] = 1
                test_loss = sess.run(log_cost,
                                     feed_dict={
                                         W: test_weights,
                                         x_tf: x[test],
                                         y_tf: y[test, None],
                                         classes_tf: classes[test]
                                     })
                scores.append(test_loss)

    return ensemble_weights, np.asarray(scores)
示例#7
0
def sinc(x, threshold=1e-20):
    """Normalized zero phase version (peak at zero)."""
    x = tf_float32(x)
    x = tf.where(tf.abs(x) < threshold, threshold * tf.ones_like(x), x)
    x = np.pi * x
    return tf.sin(x) / x
示例#8
0
    def build_model(self,
                    is_training=True,
                    inst_norm=False,
                    no_target_source=False):
        real_data = tf.placeholder(tf.float32, [
            self.batch_size, self.input_width, self.input_width,
            self.input_filters + self.output_filters
        ],
                                   name='real_A_and_B_images')
        embedding_ids = tf.placeholder(tf.int64,
                                       shape=None,
                                       name="embedding_ids")
        no_target_data = tf.placeholder(tf.float32, [
            self.batch_size, self.input_width, self.input_width,
            self.input_filters + self.output_filters
        ],
                                        name='no_target_A_and_B_images')
        no_target_ids = tf.placeholder(tf.int64,
                                       shape=None,
                                       name="no_target_embedding_ids")

        # target images
        real_B = real_data[:, :, :, :self.input_filters]
        # source images
        real_A = real_data[:, :, :, self.input_filters:self.input_filters +
                           self.output_filters]

        embedding = init_embedding(self.embedding_num, self.embedding_dim)
        fake_B, encoded_real_A = self.generator(real_A,
                                                embedding,
                                                embedding_ids,
                                                is_training=is_training,
                                                inst_norm=inst_norm)
        real_AB = tf.concat([real_A, real_B], 3)
        fake_AB = tf.concat([real_A, fake_B], 3)

        # Note it is not possible to set reuse flag back to False
        # initialize all variables before setting reuse to True
        real_D, real_D_logits, real_category_logits = self.discriminator(
            real_AB, is_training=is_training, reuse=False)
        fake_D, fake_D_logits, fake_category_logits = self.discriminator(
            fake_AB, is_training=is_training, reuse=True)

        # encoding constant loss
        # this loss assume that generated imaged and real image
        # should reside in the same space and close to each other
        encoded_fake_B = self.encoder(fake_B, is_training, reuse=True)[0]
        const_loss = (tf.reduce_mean(
            tf.square(encoded_real_A - encoded_fake_B))) * self.Lconst_penalty

        # category loss
        true_labels = tf.reshape(tf.one_hot(indices=embedding_ids,
                                            depth=self.embedding_num),
                                 shape=[self.batch_size, self.embedding_num])
        real_category_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=real_category_logits, labels=true_labels))
        fake_category_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=fake_category_logits, labels=true_labels))
        category_loss = self.Lcategory_penalty * (real_category_loss +
                                                  fake_category_loss)

        # binary real/fake loss
        d_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=real_D_logits, labels=tf.ones_like(real_D)))
        d_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=fake_D_logits, labels=tf.zeros_like(fake_D)))
        # L1 loss between real and generated images
        l1_loss = self.L1_penalty * tf.reduce_mean(tf.abs(fake_B - real_B))
        # total variation loss
        width = self.output_width
        tv_loss = (
            tf.nn.l2_loss(fake_B[:, 1:, :, :] - fake_B[:, :width - 1, :, :]) /
            width +
            tf.nn.l2_loss(fake_B[:, :, 1:, :] - fake_B[:, :, :width - 1, :]) /
            width) * self.Ltv_penalty

        # maximize the chance generator fool the discriminator
        cheat_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=fake_D_logits, labels=tf.ones_like(fake_D)))

        d_loss = d_loss_real + d_loss_fake + category_loss / 2.0
        g_loss = cheat_loss + l1_loss + self.Lcategory_penalty * fake_category_loss + const_loss + tv_loss

        if no_target_source:
            # no_target source are examples that don't have the corresponding target images
            # however, except L1 loss, we can compute category loss, binary loss and constant losses with those examples
            # it is useful when discriminator get saturated and d_loss drops to near zero
            # those data could be used as additional source of losses to break the saturation
            no_target_A = no_target_data[:, :, :, self.
                                         input_filters:self.input_filters +
                                         self.output_filters]
            no_target_B, encoded_no_target_A = self.generator(
                no_target_A,
                embedding,
                no_target_ids,
                is_training=is_training,
                inst_norm=inst_norm,
                reuse=True)
            no_target_labels = tf.reshape(
                tf.one_hot(indices=no_target_ids, depth=self.embedding_num),
                shape=[self.batch_size, self.embedding_num])
            no_target_AB = tf.concat([no_target_A, no_target_B], 3)
            no_target_D, no_target_D_logits, no_target_category_logits = self.discriminator(
                no_target_AB, is_training=is_training, reuse=True)
            encoded_no_target_B = self.encoder(no_target_B,
                                               is_training,
                                               reuse=True)[0]
            no_target_const_loss = tf.reduce_mean(
                tf.square(encoded_no_target_A -
                          encoded_no_target_B)) * self.Lconst_penalty
            no_target_category_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=no_target_category_logits,
                    labels=no_target_labels)) * self.Lcategory_penalty

            d_loss_no_target = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=no_target_D_logits,
                    labels=tf.zeros_like(no_target_D)))
            cheat_loss += tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=no_target_D_logits,
                    labels=tf.ones_like(no_target_D)))
            d_loss = d_loss_real + d_loss_fake + d_loss_no_target + (
                category_loss + no_target_category_loss) / 3.0
            g_loss = cheat_loss / 2.0 + l1_loss + \
                     (self.Lcategory_penalty * fake_category_loss + no_target_category_loss) / 2.0 + \
                     (const_loss + no_target_const_loss) / 2.0 + tv_loss

        d_loss_real_summary = tf.summary.scalar("d_loss_real", d_loss_real)
        d_loss_fake_summary = tf.summary.scalar("d_loss_fake", d_loss_fake)
        category_loss_summary = tf.summary.scalar("category_loss",
                                                  category_loss)
        cheat_loss_summary = tf.summary.scalar("cheat_loss", cheat_loss)
        l1_loss_summary = tf.summary.scalar("l1_loss", l1_loss)
        fake_category_loss_summary = tf.summary.scalar("fake_category_loss",
                                                       fake_category_loss)
        const_loss_summary = tf.summary.scalar("const_loss", const_loss)
        d_loss_summary = tf.summary.scalar("d_loss", d_loss)
        g_loss_summary = tf.summary.scalar("g_loss", g_loss)
        tv_loss_summary = tf.summary.scalar("tv_loss", tv_loss)

        d_merged_summary = tf.summary.merge([
            d_loss_real_summary, d_loss_fake_summary, category_loss_summary,
            d_loss_summary
        ])
        g_merged_summary = tf.summary.merge([
            cheat_loss_summary, l1_loss_summary, fake_category_loss_summary,
            const_loss_summary, g_loss_summary, tv_loss_summary
        ])

        # expose useful nodes in the graph as handles globally
        input_handle = InputHandle(real_data=real_data,
                                   embedding_ids=embedding_ids,
                                   no_target_data=no_target_data,
                                   no_target_ids=no_target_ids)

        loss_handle = LossHandle(d_loss=d_loss,
                                 g_loss=g_loss,
                                 const_loss=const_loss,
                                 l1_loss=l1_loss,
                                 category_loss=category_loss,
                                 cheat_loss=cheat_loss,
                                 tv_loss=tv_loss)

        eval_handle = EvalHandle(encoder=encoded_real_A,
                                 generator=fake_B,
                                 target=real_B,
                                 source=real_A,
                                 embedding=embedding)

        summary_handle = SummaryHandle(d_merged=d_merged_summary,
                                       g_merged=g_merged_summary)

        # those operations will be shared, so we need
        # to make them visible globally
        setattr(self, "input_handle", input_handle)
        setattr(self, "loss_handle", loss_handle)
        setattr(self, "eval_handle", eval_handle)
        setattr(self, "summary_handle", summary_handle)
示例#9
0
def embedding_to_padding(emb):
    """Input embeddings -> is_padding."""
    emb_sum = tf.reduce_sum(tf.abs(emb), axis=-1, keep_dims=True)
    return tf.to_float(tf.equal(emb_sum, 0.0))
  def _build_single_q_network(self, observations, head, state_t, state_tp1,
                              done_mask, reward_t, error_weight):
    """Builds the computational graph for a single Q network.

    Briefly, this part is calculating the following two quantities:
    1. q_value = q_fn(observations)
    2. td_error = q_fn(state_t) - reward_t - gamma * q_fn(state_tp1)
    The optimization target is to minimize the td_error.

    Args:
      observations: shape = [batch_size, hparams.fingerprint_length].
        The input of the Q function.
      head: shape = [1].
        The index of the head chosen for decision in bootstrap DQN.
      state_t: shape = [batch_size, hparams.fingerprint_length].
        The state at time step t.
      state_tp1: a list of tensors, with total number of batch_size,
        each has shape = [num_actions, hparams.fingerprint_length].
        Note that the num_actions can be different for each tensor.
        The state at time step t+1, tp1 is short for t plus 1.
      done_mask: shape = [batch_size, 1]
        Whether state_tp1 is the terminal state.
      reward_t: shape = [batch_size, 1]
        the reward at time step t.
      error_weight: shape = [batch_size, 1]
        weight for the loss.

    Returns:
      q_values: Tensor of [batch_size, 1]. The q values for the observations.
      td_error: Tensor of [batch_size, 1]. The TD error.
      weighted_error: Tensor of [batch_size, 1]. The TD error weighted by
        error_weight.
      q_fn_vars: List of tf.Variables. The variables of q_fn when computing
        the q_values of state_t
      q_fn_vars: List of tf.Variables. The variables of q_fn when computing
        the q_values of state_tp1

    """
    with tf.variable_scope('q_fn'):
      # q_value have shape [batch_size, 1].
      q_values = tf.gather(self.q_fn(observations), head, axis=-1)

    # calculating q_fn(state_t)
    # The Q network shares parameters with the action graph.
    with tf.variable_scope('q_fn', reuse=True):
      q_t = self.q_fn(state_t, reuse=True)
    q_fn_vars = tf.trainable_variables(scope=tf.get_variable_scope().name +
                                       '/q_fn')

    # calculating q_fn(state_tp1)
    with tf.variable_scope('q_tp1', reuse=tf.AUTO_REUSE):
      q_tp1 = [self.q_fn(s_tp1, reuse=tf.AUTO_REUSE) for s_tp1 in state_tp1]
    q_tp1_vars = tf.trainable_variables(scope=tf.get_variable_scope().name +
                                        '/q_tp1')

    if self.double_q:
      with tf.variable_scope('q_fn', reuse=True):
        q_tp1_online = [self.q_fn(s_tp1, reuse=True) for s_tp1 in state_tp1]
      if self.num_bootstrap_heads:
        num_heads = self.num_bootstrap_heads
      else:
        num_heads = 1
      # determine the action to choose based on online Q estimator.
      q_tp1_online_idx = [
          tf.stack(
              [tf.argmax(q, axis=0),
               tf.range(num_heads, dtype=tf.int64)],
              axis=1) for q in q_tp1_online
      ]
      # use the index from max online q_values to compute the value
      # function
      v_tp1 = tf.stack(
          [tf.gather_nd(q, idx) for q, idx in zip(q_tp1, q_tp1_online_idx)],
          axis=0)
    else:
      v_tp1 = tf.stack([tf.reduce_max(q) for q in q_tp1], axis=0)

    # if s_{t+1} is the terminal state, we do not evaluate the Q value of
    # the state.
    q_tp1_masked = (1.0 - done_mask) * v_tp1

    q_t_target = reward_t + self.gamma * q_tp1_masked

    # stop gradient from flowing to the computating graph which computes
    # the Q value of s_{t+1}.
    # td_error has shape [batch_size, 1]
    td_error = q_t - tf.stop_gradient(q_t_target)

    # If use bootstrap, each head is trained with a different subset of the
    # training sample. Like the idea of dropout.
    if self.num_bootstrap_heads:
      head_mask = tf.keras.backend.random_binomial(
          shape=(1, self.num_bootstrap_heads), p=0.6)
      td_error = tf.reduce_mean(td_error * head_mask, axis=1)
    # The loss comes from a traditional trick in convex optimization:
    # http://web.stanford.edu/~boyd/cvxbook/.
    # See Chapter 6 pp. 298
    # It will makes the optimization robust.
    # Specifically, the loss will use l1 instead of l2 loss when the td error
    # gets larger than 1.0. The l2 loss has the disadvantage that it has
    # the tendency to be dominated by outliers. In terms of estimation theory,
    # the asymptotic relative efficiency of the l1 loss estimator is better
    # for heavy-tailed distributions.
    errors = tf.where(
        tf.abs(td_error) < 1.0, tf.square(td_error) * 0.5,
        1.0 * (tf.abs(td_error) - 0.5))
    weighted_error = tf.reduce_mean(error_weight * errors)
    return q_values, td_error, weighted_error, q_fn_vars, q_tp1_vars
示例#11
0
    def __init__(
        self,
        *,
        scope,
        ob_space,
        ac_space,
        stochpol_fn,
        nsteps,
        nepochs=4,
        nminibatches=1,
        gamma=0.99,
        gamma_ext=0.99,
        lam=0.95,
        ent_coef=0,
        cliprange=0.2,
        max_grad_norm=1.0,
        vf_coef=1.0,
        lr=30e-5,
        adam_hps=None,
        testing=False,
        comm=None,
        comm_train=None,
        use_news=False,
        update_ob_stats_every_step=True,
        int_coeff=None,
        ext_coeff=None,
        obs_save_flag=False,
    ):
        self.lr = lr
        self.ext_coeff = ext_coeff
        self.int_coeff = int_coeff
        self.use_news = use_news
        self.update_ob_stats_every_step = update_ob_stats_every_step
        self.abs_scope = (tf.get_variable_scope().name + '/' +
                          scope).lstrip('/')
        self.testing = testing
        self.comm_log = MPI.COMM_SELF
        if comm is not None and comm.Get_size() > 1:
            self.comm_log = comm
            assert not testing or comm.Get_rank(
            ) != 0, "Worker number zero can't be testing"
        if comm_train is not None:
            self.comm_train, self.comm_train_size = comm_train, comm_train.Get_size(
            )
        else:
            self.comm_train, self.comm_train_size = self.comm_log, self.comm_log.Get_size(
            )
        self.is_log_leader = self.comm_log.Get_rank() == 0
        self.is_train_leader = self.comm_train.Get_rank() == 0
        self.obs_save_flag = obs_save_flag
        if self.is_log_leader:
            self.obs_rec = [{'acs': [], 'obs': []} for i in range(100)]

        with tf.variable_scope(scope):
            self.best_ret = -np.inf
            self.local_best_ret = -np.inf
            self.rooms = []
            self.local_rooms = []
            self.scores = []
            self.ob_space = ob_space
            self.ac_space = ac_space
            self.stochpol = stochpol_fn()
            self.nepochs = nepochs
            self.cliprange = cliprange
            self.nsteps = nsteps
            self.nminibatches = nminibatches
            self.gamma = gamma
            self.gamma_ext = gamma_ext
            self.lam = lam
            self.adam_hps = adam_hps or dict()
            self.ph_adv = tf.placeholder(tf.float32, [None, None])
            self.ph_ret_int = tf.placeholder(tf.float32, [None, None])
            self.ph_ret_ext = tf.placeholder(tf.float32, [None, None])
            self.ph_oldnlp = tf.placeholder(tf.float32, [None, None])
            self.ph_oldvpred = tf.placeholder(tf.float32, [None, None])
            self.ph_lr = tf.placeholder(tf.float32, [])
            self.ph_lr_pred = tf.placeholder(tf.float32, [])
            self.ph_cliprange = tf.placeholder(tf.float32, [])

            #Define loss.
            neglogpac = self.stochpol.pd_opt.neglogp(self.stochpol.ph_ac)
            entropy = tf.reduce_mean(self.stochpol.pd_opt.entropy())
            vf_loss_int = (0.5 * vf_coef) * tf.reduce_mean(
                tf.square(self.stochpol.vpred_int_opt - self.ph_ret_int))
            vf_loss_ext = (0.5 * vf_coef) * tf.reduce_mean(
                tf.square(self.stochpol.vpred_ext_opt - self.ph_ret_ext))
            vf_loss = vf_loss_int + vf_loss_ext
            ratio = tf.exp(self.ph_oldnlp - neglogpac)  # p_new / p_old
            negadv = -self.ph_adv
            pg_losses1 = negadv * ratio
            pg_losses2 = negadv * tf.clip_by_value(
                ratio, 1.0 - self.ph_cliprange, 1.0 + self.ph_cliprange)
            pg_loss = tf.reduce_mean(tf.maximum(pg_losses1, pg_losses2))
            ent_loss = (-ent_coef) * entropy
            approxkl = .5 * tf.reduce_mean(
                tf.square(neglogpac - self.ph_oldnlp))
            maxkl = .5 * tf.reduce_max(tf.square(neglogpac - self.ph_oldnlp))
            clipfrac = tf.reduce_mean(
                tf.to_float(tf.greater(tf.abs(ratio - 1.0),
                                       self.ph_cliprange)))
            loss = pg_loss + ent_loss + vf_loss + self.stochpol.aux_loss

            #Create optimizer.
            params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope=self.abs_scope)
            logger.info("PPO: using MpiAdamOptimizer connected to %i peers" %
                        self.comm_train_size)
            trainer = MpiAdamOptimizer(self.comm_train,
                                       learning_rate=self.ph_lr,
                                       **self.adam_hps)
            grads_and_vars = trainer.compute_gradients(loss, params)
            grads, vars = zip(*grads_and_vars)
            if max_grad_norm:
                _, _grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
            global_grad_norm = tf.global_norm(grads)
            grads_and_vars = list(zip(grads, vars))
            self._train = trainer.apply_gradients(grads_and_vars)

        #Quantities for reporting.
        self._losses = [
            loss, pg_loss, vf_loss, entropy, clipfrac, approxkl, maxkl,
            self.stochpol.aux_loss, self.stochpol.feat_var,
            self.stochpol.max_feat, global_grad_norm
        ]
        self.loss_names = [
            'tot', 'pg', 'vf', 'ent', 'clipfrac', 'approxkl', 'maxkl',
            "auxloss", "featvar", "maxfeat", "gradnorm"
        ]
        self.I = None
        self.disable_policy_update = None
        allvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                    scope=self.abs_scope)
        if self.is_log_leader:
            tf_util.display_var_info(allvars)
        tf.get_default_session().run(tf.variables_initializer(allvars))
        sync_from_root(tf.get_default_session(),
                       allvars)  #Syncs initialization across mpi workers.
        self.t0 = time.time()
        self.global_tcount = 0
示例#12
0
 def decode(self, x):
     x = tf.to_float(x)
     # we can't use tf.pow(..., 0.125) because of a high-error approximation
     # on TPU.  Instead we sqrt three times.
     return tf.sign(x) * (tf.sqrt(tf.sqrt(tf.sqrt(tf.abs(x)))) / 128.0)
示例#13
0
    def __call__(self, vocabs, moving_params=None):
        """ """

        top_recur = super(GamaParser,
                          self).__call__(vocabs, moving_params=moving_params)
        int_tokens_to_keep = tf.to_int32(self.tokens_to_keep)

        with tf.variable_scope('MLP'):
            dep_mlp, head_mlp = self.MLP(
                top_recur,
                self.arc_mlp_size + self.rel_mlp_size + 2 * self.p_mlp_size,
                n_splits=2)
            arc_dep_mlp, rel_dep_mlp, mu_dep_mlp, sigma_dep_mlp = tf.split(
                dep_mlp, [
                    self.arc_mlp_size, self.rel_mlp_size, self.p_mlp_size,
                    self.p_mlp_size
                ],
                axis=2)
            arc_head_mlp, rel_head_mlp, mu_head_mlp, sigma_head_mlp = tf.split(
                head_mlp, [
                    self.arc_mlp_size, self.rel_mlp_size, self.p_mlp_size,
                    self.p_mlp_size
                ],
                axis=2)

        with tf.variable_scope('dist'):
            with tf.variable_scope('mu'):
                # (n x b x d) o (d x 1 x d) o (n x b x d).T -> (n x b x b)
                arc_mus = self.bilinear(mu_dep_mlp, mu_head_mlp, 1)**2
            with tf.variable_scope('sigma'):
                # (n x b x d) o (d x 1 x d) o (n x b x d).T -> (n x b x b)
                arc_sigmas = self.bilinear(
                    sigma_dep_mlp, sigma_head_mlp, 1, initializer=None)**2 + .1
            # (b x 1)
            i_mat = tf.expand_dims(tf.range(self.bucket_size), 1)
            # (1 x b)
            j_mat = tf.expand_dims(tf.range(self.bucket_size), 0)
            # (b x 1) - (1 x b) -> (b x b)
            k_mat = tf.to_float(tf.abs(i_mat - j_mat))

            arc_logits = -.5 * tf.log(2 * np.pi * arc_sigmas) - .5 * (
                k_mat - arc_mus)**2 / arc_sigmas
            #arc_rs += tf.to_float(k_mat)#tf.to_float(tf.expand_dims(tf.expand_dims(self.sequence_lengths, 1), 1))
            # (b x 1)
            #n_mat = tf.expand_dims(self.sequence_lengths, 1) - 1 - i_mat
            # (b x b) * (n x b x b) - (n x b x b) - (b x b) -> (n x b x b)
            #arc_logits = (tf.lgamma(arc_rs+1) - tf.lgamma(k_mat) - tf.lgamma(arc_rs-k_mat+2) +
            #               k_mat * tf.log(arc_ps) + (arc_rs-k_mat+1)*tf.log(1-arc_ps) )
        with tf.variable_scope('Arc'):
            # (n x b x d) o (d x 1 x d) o (n x b x d).T -> (n x b x b)
            arc_logits += self.bilinear(arc_dep_mlp,
                                        arc_head_mlp,
                                        1,
                                        add_bias2=False)
            # (n x b x b)
            arc_probs = tf.nn.softmax(arc_logits)
            # (n x b)
            arc_preds = tf.to_int32(tf.argmax(arc_logits, axis=-1))
            # (n x b)
            arc_targets = self.vocabs['heads'].placeholder
            # (n x b)
            arc_correct = tf.to_int32(tf.equal(
                arc_preds, arc_targets)) * int_tokens_to_keep
            # ()
            arc_loss = tf.losses.sparse_softmax_cross_entropy(
                arc_targets, arc_logits, self.tokens_to_keep)

        with tf.variable_scope('Rel'):
            # (n x b x d) o (d x r x d) o (n x b x d).T -> (n x b x r x b)
            rel_logits = self.bilinear(rel_dep_mlp, rel_head_mlp,
                                       len(self.vocabs['rels']))
            # (n x b x r x b)
            rel_probs = tf.nn.softmax(rel_logits, dim=2)
            # (n x b x b)
            one_hot = tf.one_hot(
                arc_preds if moving_params is not None else arc_targets,
                self.bucket_size)
            # (n x b x b) -> (n x b x b x 1)
            one_hot = tf.expand_dims(one_hot, axis=3)
            # (n x b x r x b) o (n x b x b x 1) -> (n x b x r x 1)
            select_rel_logits = tf.matmul(rel_logits, one_hot)
            # (n x b x r x 1) -> (n x b x r)
            select_rel_logits = tf.squeeze(select_rel_logits, axis=3)
            # (n x b)
            rel_preds = tf.to_int32(tf.argmax(select_rel_logits, axis=-1))
            # (n x b)
            rel_targets = self.vocabs['rels'].placeholder
            # (n x b)
            rel_correct = tf.to_int32(tf.equal(
                rel_preds, rel_targets)) * int_tokens_to_keep
            # ()
            rel_loss = tf.losses.sparse_softmax_cross_entropy(
                rel_targets, select_rel_logits, self.tokens_to_keep)

        n_arc_correct = tf.reduce_sum(arc_correct)
        n_rel_correct = tf.reduce_sum(rel_correct)
        correct = arc_correct * rel_correct
        n_correct = tf.reduce_sum(correct)
        n_seqs_correct = tf.reduce_sum(
            tf.to_int32(
                tf.equal(tf.reduce_sum(correct, axis=1),
                         self.sequence_lengths - 1)))
        loss = arc_loss + rel_loss

        outputs = {
            'arc_logits': arc_logits,
            'arc_mus': arc_mus,
            'arc_sigmas': arc_sigmas,
            'arc_probs': arc_probs,
            'arc_preds': arc_preds,
            'arc_targets': arc_targets,
            'arc_correct': arc_correct,
            'arc_loss': arc_loss,
            'n_arc_correct': n_arc_correct,
            'rel_logits': rel_logits,
            'rel_probs': rel_probs,
            'rel_preds': rel_preds,
            'rel_targets': rel_targets,
            'rel_correct': rel_correct,
            'rel_loss': rel_loss,
            'n_rel_correct': n_rel_correct,
            'n_tokens': self.n_tokens,
            'n_seqs': self.batch_size,
            'tokens_to_keep': self.tokens_to_keep,
            'n_correct': n_correct,
            'n_seqs_correct': n_seqs_correct,
            'loss': loss
        }

        return outputs
 def apply_gradient_clipping(gradient):
     if gradient is not None:
         return tf.mul(tf.clip_by_value(tf.abs(grad), 0.1, 1.),
                       tf.sign(grad))
     else:
         return None
示例#15
0
    def start_interaction(self, env_fns, dynamics, nlump=2):
        self.loss_names, self._losses = zip(*list(self.to_report.items()))
        self.global_step = tf.Variable(0, trainable=False)
        params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        if MPI.COMM_WORLD.Get_size() > 1:
            if self.agent_num is None:
                trainer = MpiAdamOptimizer(learning_rate=self.ph_lr,
                                           comm=MPI.COMM_WORLD)

        else:
            if self.agent_num is None:
                if self.optim == 'adam':
                    trainer = tf.train.AdamOptimizer(learning_rate=self.ph_lr)
                elif self.optim == 'sgd':
                    print("using sgd")
                    print("________________________")
                    if self.decay:
                        self.decay_lr = tf.train.exponential_decay(
                            self.ph_lr,
                            self.global_step,
                            2500,
                            .96,
                            staircase=True)
                        trainer = tf.train.GradientDescentOptimizer(
                            learning_rate=self.decay_lr)
                    else:
                        trainer = tf.train.GradientDescentOptimizer(
                            learning_rate=self.ph_lr)
                elif self.optim == 'momentum':
                    print('using momentum')
                    print('________________________')
                    trainer = tf.train.MomentumOptimizer(
                        learning_rate=self.ph_lr, momentum=0.9)
        if self.agent_num is None:
            gradsandvars = trainer.compute_gradients(self.total_loss, params)
            l2_norm = lambda t: tf.sqrt(tf.reduce_sum(tf.pow(t, 2)))
            if self.log_grads:
                for grad, var in gradsandvars:
                    tf.summary.histogram(var.name + '/gradient', l2_norm(grad))
                    tf.summary.histogram(var.name + '/value', l2_norm(var))
                    grad_mean = tf.reduce_mean(tf.abs(grad))
                    tf.summary.scalar(var.name + '/grad_mean', grad_mean)
                if self.decay:
                    tf.summary.scalar('decay_lr', self.decay_lr)
                self._summary = tf.summary.merge_all()
                tf.add_to_collection("summary_op", self._summary)
            if self.grad_clip > 0:
                grads, gradvars = zip(*gradsandvars)
                grads, _ = tf.clip_by_global_norm(grads, self.grad_clip)
                gradsandvars = list(zip(grads, gradvars))

            self._train = trainer.apply_gradients(gradsandvars,
                                                  global_step=self.global_step)
            self._updates = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self._train = tf.group(self._train, self._updates)
            tf.add_to_collection("train_op", self._train)
        else:
            self._train = tf.get_collection("train_op")[0]
            if self.log_grads:
                self._summary = tf.get_collection("summary_op")[0]

        if MPI.COMM_WORLD.Get_rank() == 0:
            getsess().run(
                tf.variables_initializer(
                    tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)))
        bcast_tf_vars_from_root(
            getsess(), tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))

        self.all_visited_rooms = []
        self.all_scores = []
        self.nenvs = nenvs = len(env_fns)
        self.nlump = nlump
        self.lump_stride = nenvs // self.nlump
        self.envs = [
            VecEnv(env_fns[l * self.lump_stride:(l + 1) * self.lump_stride],
                   spaces=[self.env_ob_space, self.ac_space])
            for l in range(self.nlump)
        ]

        self.rollout = Rollout(ob_space=self.ob_space,
                               ac_space=self.ac_space,
                               nenvs=nenvs,
                               nsteps_per_seg=self.nsteps_per_seg,
                               nsegs_per_env=self.nsegs_per_env,
                               nlumps=self.nlump,
                               envs=self.envs,
                               policy=self.stochpol,
                               int_rew_coeff=self.int_coeff,
                               ext_rew_coeff=self.ext_coeff,
                               record_rollouts=self.use_recorder,
                               dynamics=dynamics,
                               exp_name=self.exp_name,
                               env_name=self.env_name,
                               video_log_freq=self.video_log_freq,
                               model_save_freq=self.model_save_freq,
                               use_apples=self.use_apples,
                               multi_envs=self.multi_envs,
                               lstm=self.lstm,
                               lstm1_size=self.lstm1_size,
                               lstm2_size=self.lstm2_size,
                               depth_pred=self.depth_pred,
                               early_stop=self.early_stop,
                               aux_input=self.aux_input)

        self.buf_advs = np.zeros((nenvs, self.rollout.nsteps), np.float32)
        self.buf_rets = np.zeros((nenvs, self.rollout.nsteps), np.float32)

        if self.normrew:
            self.rff = RewardForwardFilter(self.gamma)
            self.rff_rms = RunningMeanStd()

        self.step_count = 0
        self.t_last_update = time.time()
        self.t_start = time.time()
示例#16
0
 def diff(a, b):
     return tf.reduce_mean(tf.abs(a - b))
示例#17
0
    def __init__(self,
                 *,
                 hps,
                 scope,
                 ob_space,
                 env_ob_space,
                 ac_space,
                 stochpol,
                 ent_coef,
                 gamma,
                 lam,
                 nepochs,
                 lr,
                 cliprange,
                 nminibatches,
                 normrew,
                 normadv,
                 use_news,
                 ext_coeff,
                 int_coeff,
                 nsteps_per_seg,
                 nsegs_per_env,
                 dynamics,
                 exp_name,
                 env_name,
                 video_log_freq,
                 model_save_freq,
                 use_apples,
                 agent_num=None,
                 restore_name=None,
                 multi_envs=None,
                 lstm=False,
                 lstm1_size=512,
                 lstm2_size=0,
                 depth_pred=0,
                 beta_d=.1,
                 early_stop=0,
                 aux_input=0,
                 optim='adam',
                 decay=0,
                 grad_clip=0.0,
                 log_grads=0,
                 logdir='logs'):
        self.dynamics = dynamics
        self.exp_name = exp_name
        self.env_name = env_name
        self.video_log_freq = video_log_freq
        self.model_save_freq = model_save_freq
        self.use_apples = use_apples
        self.agent_num = agent_num
        self.multi_envs = multi_envs
        self.lstm = lstm
        self.lstm1_size = lstm1_size
        self.lstm2_size = lstm2_size
        self.depth_pred = depth_pred
        self.aux_input = aux_input
        self.early_stop = early_stop
        self.optim = optim
        self.decay = decay
        self.log_grads = log_grads
        self.grad_clip = grad_clip
        if log_grads:
            self.grad_writer = tf.summary.FileWriter(logdir + '/grads/' +
                                                     exp_name)
        with tf.variable_scope(scope):
            self.use_recorder = True
            self.n_updates = 0
            self.scope = scope
            self.ob_space = ob_space
            self.env_ob_space = env_ob_space
            self.ac_space = ac_space
            self.stochpol = stochpol
            self.nepochs = nepochs
            self.lr = lr
            self.cliprange = cliprange
            self.nsteps_per_seg = nsteps_per_seg
            self.nsegs_per_env = nsegs_per_env
            self.nminibatches = nminibatches
            self.gamma = gamma
            self.lam = lam
            self.normrew = normrew
            self.normadv = normadv
            self.use_news = use_news
            self.ext_coeff = ext_coeff
            self.int_coeff = int_coeff
            self.ent_coeff = ent_coef
            self.beta_d = beta_d

            def mask(target, mask):
                mask_h = tf.abs(mask - 1)
                return tf.stop_gradient(mask_h * target) + mask * target

            if self.agent_num is None:
                self.ph_adv = tf.placeholder(tf.float32, [None, None],
                                             name='adv')
                self.ph_ret = tf.placeholder(tf.float32, [None, None],
                                             name='ret')
                self.ph_rews = tf.placeholder(tf.float32, [None, None],
                                              name='rews')
                self.ph_oldnlp = tf.placeholder(tf.float32, [None, None],
                                                name='oldnlp')
                self.ph_oldvpred = tf.placeholder(tf.float32, [None, None],
                                                  name='oldvpred')
                self.ph_lr = tf.placeholder(tf.float32, [], name='lr')
                self.ph_cliprange = tf.placeholder(tf.float32, [],
                                                   name='cliprange')
                self.ph_gradmask = tf.placeholder(tf.float32, [None, None],
                                                  name='gradmask')
                neglogpac = mask(self.stochpol.pd.neglogp(self.stochpol.ph_ac),
                                 self.ph_gradmask)
                entropy = tf.reduce_mean(self.stochpol.pd.entropy(),
                                         name='agent_entropy')
                vpred = mask(self.stochpol.vpred, self.ph_gradmask)
                vf_loss = 0.5 * tf.reduce_mean(
                    (vpred - mask(self.ph_ret, self.ph_gradmask))**2,
                    name='vf_loss')
                ratio = tf.exp(self.ph_oldnlp - neglogpac,
                               name='ratio')  # p_new / p_old
                negadv = -mask(self.ph_adv, self.ph_gradmask)
                pg_losses1 = negadv * ratio
                pg_losses2 = negadv * tf.clip_by_value(ratio,
                                                       1.0 - self.ph_cliprange,
                                                       1.0 + self.ph_cliprange,
                                                       name='pglosses2')
                pg_loss_surr = tf.maximum(pg_losses1,
                                          pg_losses2,
                                          name='loss_surr')
                pg_loss = tf.reduce_mean(pg_loss_surr, name='pg_loss')
                ent_loss = (-ent_coef) * entropy
                if self.depth_pred:
                    depth_loss = self.stochpol.depth_loss * beta_d
                approxkl = .5 * tf.reduce_mean(
                    tf.square(neglogpac - self.ph_oldnlp), name='approxkl')
                clipfrac = tf.reduce_mean(
                    tf.to_float(tf.abs(pg_losses2 - pg_loss_surr) > 1e-6),
                    name='clipfrac')

                self.total_loss = pg_loss + ent_loss + vf_loss
                if self.depth_pred:
                    self.total_loss = self.total_loss + depth_loss
                    #self.total_loss = depth_loss
                    #print("adding depth loss to total loss for optimization")
                #self.total_loss = depth_loss
                self.to_report = {
                    'tot': self.total_loss,
                    'pg': pg_loss,
                    'vf': vf_loss,
                    'ent': entropy,
                    'approxkl': approxkl,
                    'clipfrac': clipfrac
                }
                if self.depth_pred:
                    self.to_report.update({'depth_loss': depth_loss})
                tf.add_to_collection('adv', self.ph_adv)
                tf.add_to_collection('ret', self.ph_ret)
                tf.add_to_collection('rews', self.ph_rews)
                tf.add_to_collection('oldnlp', self.ph_oldnlp)
                tf.add_to_collection('oldvpred', self.ph_oldvpred)
                tf.add_to_collection('lr', self.ph_lr)
                tf.add_to_collection('cliprange', self.ph_cliprange)
                tf.add_to_collection('agent_entropy', entropy)
                tf.add_to_collection('vf_loss', vf_loss)
                tf.add_to_collection('ratio', ratio)
                tf.add_to_collection('pg_losses2', pg_losses2)
                tf.add_to_collection('loss_surr', pg_loss_surr)
                tf.add_to_collection('pg_loss', pg_loss)
                if self.depth_pred:
                    tf.add_to_collection('depth_loss', depth_loss)
                tf.add_to_collection('approxkl', approxkl)
                tf.add_to_collection('clipfrac', clipfrac)
            else:
                self.restore()
示例#18
0
def model_creation(neurons, nb_features, nb_targets):
    # Session
    sess = tf.InteractiveSession()

    # Placeholders
    X = tf.placeholder(tf.float32, shape=[None, nb_features])
    Y = tf.placeholder(tf.float32, shape=[None, nb_targets])

    # Definition on number of neurons and layers
    if len(neurons) < 1:
        raise Exception("You must have at least one hidden layer")

    weight_initializer = tf.variance_scaling_initializer(
        mode="fan_avg", distribution="uniform", scale=1)
    bias_initializer = tf.zeros_initializer()
    layers_dict = {}  #

    # Hidden weight and bias
    for id in range(len(neurons)):
        if id == 0:
            layers_dict["weight_hidden_" + str(id)] = tf.Variable(
                weight_initializer([nb_features, neurons[id]]))
            layers_dict["bias_hidden_" + str(id)] = tf.Variable(
                bias_initializer([neurons[id]]))
        else:
            layers_dict["weight_hidden_" + str(id)] = tf.Variable(
                weight_initializer([neurons[id - 1], neurons[id]]))
            layers_dict["bias_hidden_" + str(id)] = tf.Variable(
                bias_initializer([neurons[id]]))

    # Out layers and bias
    layers_dict["weight_out"] = tf.Variable(
        weight_initializer([neurons[-1], nb_targets]))
    layers_dict["bias_out"] = tf.Variable(bias_initializer([nb_targets]))

    # Hidden layers
    for id in range(len(neurons)):
        if id == 0:
            layers_dict["hidden_layer_" + str(id)] = tf.sigmoid(
                tf.add(tf.matmul(X, layers_dict["weight_hidden_" + str(id)]),
                       layers_dict["bias_hidden_" + str(id)]))
        else:
            layers_dict["hidden_layer_" + str(id)] = tf.sigmoid(
                tf.add(
                    tf.matmul(layers_dict["hidden_layer_" + str(id - 1)],
                              layers_dict["weight_hidden_" + str(id)]),
                    layers_dict["bias_hidden_" + str(id)]))

    # Output layer
    layers_dict["output_layer"] = tf.abs(tf.transpose(
        tf.add(
            tf.matmul(layers_dict["hidden_layer_" + str(len(neurons) - 1)],
                      layers_dict["weight_out"]), layers_dict["bias_out"])),
                                         name="output_layer")

    #Cost_function
    mse = tf.sqrt(
        tf.reduce_mean(tf.squared_difference(layers_dict["output_layer"], Y)))

    # Optimizer
    opt = tf.train.AdamOptimizer(0.001).minimize(mse)

    # Init
    sess.run(tf.global_variables_initializer())

    return ((X, Y, sess, opt, mse, layers_dict))
示例#19
0
def sym_exp_sigmoid(x, width=8.0):
    """Symmetrical version of exp_sigmoid centered at (0, 1e-7)."""
    x = tf_float32(x)
    return exp_sigmoid(width * (tf.abs(x) / 2.0 - 1.0))
def rgbd_consistency_loss(frame1transformed_depth,
                          frame1rgb,
                          frame2depth,
                          frame2rgb,
                          validity_mask=None):
  """Computes a loss that penalizes RGBD inconsistencies between frames.

  This function computes 3 losses that penalize inconsistencies between two
  frames: depth, RGB, and structural similarity. It IS NOT SYMMETRIC with
  respect to both frames. In particular, to address occlusions, it only
  penalizes depth and RGB inconsistencies at pixels where frame1 is closer to
  the camera than frame2 (Why? see https://arxiv.org/abs/1904.04998). Therefore
  the intended usage pattern is running it twice - second time with the two
  frames swapped.

  Args:
    frame1transformed_depth: A transform_depth_map.TransformedDepthMap object
      representing the depth map of frame 1 after it was motion-transformed to
      frame 2, a motion transform that accounts for all camera and object motion
      that occurred between frame1 and frame2. The tensors inside
      frame1transformed_depth are of shape [B, H, W].
    frame1rgb: A tf.Tensor of shape [B, H, W, C] containing the RGB image at
      frame1.
    frame2depth: A tf.Tensor of shape [B, H, W] containing the depth map at
      frame2.
    frame2rgb: A tf.Tensor of shape [B, H, W, C] containing the RGB image at
      frame2.
    validity_mask: a tf.Tensor of a floating point type and a shape of
      [B, H, W, 1] containing a validity mask.

  Returns:
    A dicionary from string to tf.Tensor, with the following entries:
      depth_error: A tf scalar, the depth mismatch error between the two frames.
      rgb_error: A tf scalar, the rgb mismatch error between the two frames.
      ssim_error: A tf scalar, the strictural similarity mismatch error between
        the two frames.
      depth_proximity_weight: A tf.Tensor of shape [B, H, W], representing a
        function that peaks (at 1.0) for pixels where there is depth consistency
        between the two frames, and is small otherwise.
      frame1_closer_to_camera: A tf.Tensor of shape [B, H, W, 1], a mask that is
        1.0 when the depth map of frame 1 has smaller depth than frame 2.
  """
  frame2rgbd = tf.concat(
      [frame2rgb, tf.expand_dims((frame2depth), -1)], axis=-1)
  frame2rgbd_resampled = resampler.resampler_with_unstacked_warp(
      frame2rgbd,
      frame1transformed_depth.pixel_x,
      frame1transformed_depth.pixel_y,
      safe=False)
  frame2rgb_resampled, frame2depth_resampled = tf.split(
      frame2rgbd_resampled, [3, 1], axis=-1)
  frame2depth_resampled = tf.squeeze(frame2depth_resampled, axis=-1)

  # f1td.depth is the predicted depth at [pixel_y, pixel_x] for frame2. Now we
  # generate (by interpolation) the actual depth values for frame2's depth, at
  # the same locations, so that we can compare the two depths.

  # We penalize inconsistencies between the two frames' depth maps only if the
  # transformed depth map (of frame 1) falls closer to the camera than the
  # actual depth map (of frame 2). This is intended for avoiding penalizing
  # points that become occluded because of the transform.
  # So what about depth inconsistencies where frame1's depth map is FARTHER from
  # the camera than frame2's? These will be handled when we swap the roles of
  # frame 1 and 2 (more in https://arxiv.org/abs/1904.04998).
  frame1_closer_to_camera = tf.to_float(
      tf.logical_and(
          frame1transformed_depth.mask,
          tf.less(frame1transformed_depth.depth, frame2depth_resampled)))
  frames_l1_diff = tf.abs(frame2depth_resampled - frame1transformed_depth.depth)
  if validity_mask is not None:
    frames_l1_diff = frames_l1_diff * tf.squeeze(validity_mask, axis=[3])
  depth_error = tf.reduce_mean(
      tf.math.multiply_no_nan(frames_l1_diff, frame1_closer_to_camera))

  frames_rgb_l1_diff = tf.abs(frame2rgb_resampled - frame1rgb)
  if validity_mask is not None:
    frames_rgb_l1_diff = frames_rgb_l1_diff * validity_mask
  rgb_error = tf.math.multiply_no_nan(
      frames_rgb_l1_diff, tf.expand_dims(frame1_closer_to_camera, -1))
  rgb_error = tf.reduce_mean(rgb_error)

  # We generate a weight function that peaks (at 1.0) for pixels where when the
  # depth difference is less than its standard deviation across the frame, and
  # fall off to zero otherwise. This function is used later for weighing the
  # structural similarity loss term. We only want to demand structural
  # similarity for surfaces that are close to one another in the two frames.
  depth_error_second_moment = _weighted_average(
      tf.square(frame2depth_resampled - frame1transformed_depth.depth),
      frame1_closer_to_camera) + 1e-4
  depth_proximity_weight = tf.math.multiply_no_nan(
      depth_error_second_moment /
      (tf.square(frame2depth_resampled - frame1transformed_depth.depth) +
       depth_error_second_moment), tf.to_float(frame1transformed_depth.mask))

  if validity_mask is not None:
    depth_proximity_weight = depth_proximity_weight * tf.squeeze(
        validity_mask, axis=[3])

  # If we don't stop the gradient training won't start. The reason is presumably
  # that then the network can push the depths apart instead of seeking RGB
  # consistency.
  depth_proximity_weight = tf.stop_gradient(depth_proximity_weight)

  ssim_error, avg_weight = weighted_ssim(
      frame2rgb_resampled,
      frame1rgb,
      depth_proximity_weight,
      c1=float('inf'),  # These values of c1 and c2 seemed to work better than
      c2=9e-6)  # defaults. TODO(gariel): Make them parameters rather
  # than hard coded.
  ssim_error_mean = tf.reduce_mean(
      tf.math.multiply_no_nan(ssim_error, avg_weight))

  endpoints = {
      'depth_error': depth_error,
      'rgb_error': rgb_error,
      'ssim_error': ssim_error_mean,
      'depth_proximity_weight': depth_proximity_weight,
      'frame1_closer_to_camera': frame1_closer_to_camera
  }
  return endpoints
示例#21
0
def soft_relu(x):
  """Compute log(1 + exp(x))."""
  # Note: log(sigmoid(x)) = x - soft_relu(x) = - soft_relu(-x).
  #       log(1 - sigmoid(x)) = - soft_relu(x)
  return tf.log(1.0 + tf.exp(-tf.abs(x))) + tf.maximum(x, 0.0)
  def _finish(self, state):

    update_ops = []

    grads_at_prev_iterate = self._recompute_gradients(state)

    for var, grad, grad_at_prev_iterate in zip(self.vars, self.grads,
                                               grads_at_prev_iterate):
      sum_grad_squared = state.get_slot(var, SUM_GRAD_SQUARED)
      previous_iterate = state.get_slot(var, PREVIOUS_ITERATE)
      maximum_gradient = state.get_slot(var, MAXIMUM_GRADIENT)
      sum_estimates_squared = state.get_slot(var, SUM_ESTIMATES_SQUARED)

      maximum_gradient_updated = tf.assign(
          maximum_gradient, tf.maximum(maximum_gradient, tf.norm(grad)))
      update_ops.append(maximum_gradient_updated)

      sum_grad_squared_updated = tf.assign_add(sum_grad_squared,
                                               tf.pow(tf.abs(grad), 2.0))
      update_ops.append(sum_grad_squared_updated)

      smoothness = tf.norm(grad - grad_at_prev_iterate) / (
          0.0001 + tf.norm(var - previous_iterate))
      eta = self.lr * tf.pow(self.eta + sum_grad_squared_updated, -1.0 / 3.0)

      beta = tf.minimum(1.0, self.momentum * tf.square(eta))

      grad_estimate = state.get_slot(var, GRAD_ESTIMATE)

      new_grad_estimate = grad + (1.0 - beta) * (
          grad_estimate - grad_at_prev_iterate)
      new_grad_estimate = tf.clip_by_value(new_grad_estimate,
                                           -maximum_gradient_updated,
                                           maximum_gradient_updated)

      if self.output_summaries:
        tf.summary.scalar(self._name + "/smoothness/" + var.name, smoothness)
        tf.summary.scalar(self._name + "/max_grad/" + var.name,
                          maximum_gradient_updated)
        tf.summary.scalar(self._name + "/average_beta/" + var.name,
                          tf.reduce_mean(beta))
        tf.summary.scalar(self._name + "/iterate_diff/" + var.name,
                          tf.norm(var - previous_iterate))
        tf.summary.scalar(self._name + "/grad_diff/" + var.name,
                          tf.norm(grad - grad_at_prev_iterate))
        tf.summary.scalar(self._name + "/vr_grad_estimate_norm/" + var.name,
                          tf.norm(new_grad_estimate))
        tf.summary.scalar(self._name + "/grad_norm/" + var.name, tf.norm(grad))

      grad_estimate_updated = tf.assign(grad_estimate, new_grad_estimate)
      update_ops.append(grad_estimate_updated)

      sum_estimates_squared_updated = tf.assign_add(
          sum_estimates_squared, tf.square(new_grad_estimate))
      update_ops.append(sum_estimates_squared_updated)

      with tf.control_dependencies([grad_at_prev_iterate]):
        previous_iterate_updated = tf.assign(previous_iterate, var)
        update_ops.append(previous_iterate_updated)

      step = -eta * grad_estimate_updated

      with tf.control_dependencies([previous_iterate_updated]):
        var_updated = tf.assign_add(var, step)
        update_ops.append(var_updated)

    return tf.group(*update_ops)
示例#23
0
def advantage_activation_sqrt(x):
    alpha = 0.01
    ret = tf.sign(x) * (tf.sqrt(tf.abs(x) + alpha**2) - alpha)
    return ret
    def next_timestep(self, state, action):
        '''Calculate the next state of the quadcopter after one timestep
        Size of tensors' first dimension is the batch size for parallel computation
        Params
        ======
            state: rank-2 tensor, state in the form [position,orientation,vel,ang_vel]
            action: rank-2 tensor, action commands in the form [climb,roll,pitch,yaw]
        Returns
        ======
            rank-2 tensor, next state in the form [position,orientation,vel,ang_vel]
        '''
        eta = state[:, 0:3]
        upsilon = state[:, 3:6]
        Tport = action[:, 0]
        Tstbd = action[:, 1]

        zeros = tf.zeros([self.batch_size], dtype=tf.float32)
        ones = tf.ones([self.batch_size], dtype=tf.float32)
        Xu = tf.where(
            tf.less(tf.abs(upsilon[:, 0]), 1.2),
            tf.constant(-0.25, dtype=tf.float32, shape=[self.batch_size]),
            tf.constant(64.55, dtype=tf.float32, shape=[self.batch_size]))
        Xuu = tf.where(
            tf.less(tf.abs(upsilon[:, 0]), 1.2),
            tf.constant(0.0, dtype=tf.float32, shape=[self.batch_size]),
            tf.constant(-70.92, dtype=tf.float32, shape=[self.batch_size]))

        Yv = 0.5*(-40*1000*tf.abs(upsilon[:, 1])) * \
            (1.1+0.0045*(1.01/0.09) - 0.1*(0.27/0.09)+0.016*(tf.pow((0.27/0.09), 2)))
        Yr = 6*(-3.141592*1000) * \
            tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01
        Nv = 0.06*(-3.141592*1000) * \
            tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01
        Nr = 0.02*(-3.141592*1000) * \
            tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01*1.01

        M = tf.constant(
            [[self.boat.physics.m - self.boat.physics.X_u_dot, 0, 0],
             [
                 0, self.boat.physics.m - self.boat.physics.Y_v_dot,
                 0 - self.boat.physics.Y_r_dot
             ],
             [
                 0, 0 - self.boat.physics.N_v_dot,
                 self.boat.physics.Iz - self.boat.physics.N_r_dot
             ]])

        T = tf.stack([
            Tport + self.boat.physics.c * Tstbd, zeros,
            0.5 * self.boat.physics.B * (Tport - self.boat.physics.c * Tstbd)
        ],
                     axis=1)
        T = tf.reshape(T, [self.batch_size, 3, 1])

        CRB = tf.stack([[zeros, zeros, -self.boat.physics.m * upsilon[:, 1]],
                        [zeros, zeros, self.boat.physics.m * upsilon[:, 0]],
                        [
                            self.boat.physics.m * upsilon[:, 1],
                            -self.boat.physics.m * upsilon[:, 0], zeros
                        ]])
        CRB = tf.transpose(CRB, perm=[2, 0, 1])

        CA = tf.stack(
            [[
                zeros, zeros,
                2 * ((self.boat.physics.Y_v_dot * upsilon[:, 1]) +
                     ((self.boat.physics.Y_r_dot + self.boat.physics.N_v_dot) /
                      2) * upsilon[:, 2])
            ],
             [
                 zeros, zeros, -self.boat.physics.X_u_dot *
                 self.boat.physics.m * upsilon[:, 0]
             ],
             [
                 2 *
                 (((-self.boat.physics.Y_v_dot) * upsilon[:, 1]) -
                  ((self.boat.physics.Y_r_dot + self.boat.physics.N_v_dot) / 2)
                  * upsilon[:, 2]), self.boat.physics.X_u_dot *
                 self.boat.physics.m * upsilon[:, 0], zeros
             ]])
        CA = tf.transpose(CA, perm=[2, 0, 1])

        C = CRB + CA

        Dl = tf.stack([[-Xu, zeros, zeros], [zeros, -Yv, -Yr],
                       [zeros, -Nv, -Nr]])
        Dl = tf.transpose(Dl, perm=[2, 0, 1])

        Dn = tf.stack([[Xuu * abs(upsilon[:, 0]), zeros, zeros],
                       [
                           zeros,
                           self.boat.physics.Yvv * tf.abs(upsilon[:, 1]) +
                           self.boat.physics.Yvr * tf.abs(upsilon[:, 2]),
                           self.boat.physics.Yrv * tf.abs(upsilon[:, 1]) +
                           self.boat.physics.Yrr * tf.abs(upsilon[:, 2])
                       ],
                       [
                           zeros,
                           self.boat.physics.Nvv * tf.abs(upsilon[:, 1]) +
                           self.boat.physics.Nvr * tf.abs(upsilon[:, 2]),
                           self.boat.physics.Nrv * tf.abs(upsilon[:, 1]) +
                           self.boat.physics.Nrr * tf.abs(upsilon[:, 2])
                       ]])
        Dn = tf.transpose(Dn, perm=[2, 0, 1])

        D = Dl - Dn

        upsilon = tf.reshape(upsilon, [self.batch_size, 3, 1])
        upsilon_dot = tf.matmul(
            tf.linalg.inv(M),
            (T - tf.matmul(C, upsilon) - tf.matmul(D, upsilon)))

        upsilon = (self.train_dt) * upsilon_dot + upsilon  # integral

        J = tf.stack([[tf.cos(eta[:, 2]), -tf.sin(eta[:, 2]), zeros],
                      [tf.sin(eta[:, 2]),
                       tf.cos(eta[:, 2]), zeros], [zeros, zeros, ones]])
        J = tf.transpose(J, perm=[2, 0, 1])

        eta_dot = tf.matmul(
            J, upsilon)  # transformation into local reference frame
        eta = tf.reshape(eta, [self.batch_size, 3, 1])
        eta = (self.train_dt) * eta_dot + eta  # integral

        # eta[:, 2] = tf.where(tf.greater(tf.abs(eta[:, 2]), np.pi))
        #    eta[2] = (self.eta[2]/abs(self.eta[2]))*(abs(self.eta[2])-2*np.pi)

        eta = tf.reshape(eta, [self.batch_size, 3])
        upsilon = tf.reshape(upsilon, [self.batch_size, 3])
        next_state = tf.concat([eta, upsilon], axis=1)
        reward = self.get_reward(next_state)
        return next_state, reward
def main(unused_argv):
    if not tf.gfile.IsDirectory(FLAGS.eval_dir):
        tf.gfile.MakeDirs(FLAGS.eval_dir)

    cfg, _ = get_named_config(FLAGS.model_cfg, FLAGS.model_cfg_overrides)

    # Load data
    with tf.name_scope("loader"):
        feat_dict = load_noteseqs(
            FLAGS.dataset_fp,
            cfg.eval_batch_size,
            cfg.eval_seq_len,
            max_discrete_times=cfg.data_max_discrete_times,
            max_discrete_velocities=cfg.data_max_discrete_velocities,
            augment_stretch_bounds=None,
            augment_transpose_bounds=None,
            randomize_chord_order=cfg.data_randomize_chord_order,
            repeat=False)

    # Build model
    with tf.variable_scope("phero_model"):
        model_dict = build_genie_model(feat_dict,
                                       cfg,
                                       cfg.eval_batch_size,
                                       cfg.eval_seq_len,
                                       is_training=False)
    genie_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                   scope="phero_model")

    # Build gold model
    eval_gold = False
    if cfg.stp_emb_vq or cfg.stp_emb_iq:
        eval_gold = True
        with tf.variable_scope("phero_model", reuse=True):
            gold_feat_dict = {
                "midi_pitches": tf.placeholder(tf.int32, [1, None]),
                "velocities": tf.placeholder(tf.int32, [1, None]),
                "delta_times_int": tf.placeholder(tf.int32, [1, None])
            }
            gold_seq_maxlen = gold.gold_longest()
            gold_seq_varlens = tf.placeholder(tf.int32, [1])
            gold_buttons = tf.placeholder(tf.int32, [1, None])
            gold_model_dict = build_genie_model(gold_feat_dict,
                                                cfg,
                                                1,
                                                gold_seq_maxlen,
                                                is_training=False,
                                                seq_varlens=gold_seq_varlens)

        gold_encodings = gold_model_dict["stp_emb_vq_discrete" if cfg.
                                         stp_emb_vq else "stp_emb_iq_discrete"]
        gold_mask = tf.sequence_mask(gold_seq_varlens,
                                     maxlen=gold_seq_maxlen,
                                     dtype=tf.float32)
        gold_diff = tf.cast(gold_buttons, tf.float32) - tf.cast(
            gold_encodings, tf.float32)
        gold_diff_l2 = tf.square(gold_diff)
        gold_diff_l1 = tf.abs(gold_diff)

        weighted_avg = lambda t, m: tf.reduce_sum(t * m) / tf.reduce_sum(m)

        gold_diff_l2 = weighted_avg(gold_diff_l2, gold_mask)
        gold_diff_l1 = weighted_avg(gold_diff_l1, gold_mask)

        gold_diff_l2_placeholder = tf.placeholder(tf.float32, [None])
        gold_diff_l1_placeholder = tf.placeholder(tf.float32, [None])

    summary_name_to_batch_tensor = {}

    # Summarize quantized step embeddings
    if cfg.stp_emb_vq:
        summary_name_to_batch_tensor["codebook_perplexity"] = model_dict[
            "stp_emb_vq_codebook_ppl"]
        summary_name_to_batch_tensor["loss_vqvae"] = model_dict[
            "stp_emb_vq_loss"]

    # Summarize integer-quantized step embeddings
    if cfg.stp_emb_iq:
        summary_name_to_batch_tensor["discrete_perplexity"] = model_dict[
            "stp_emb_iq_discrete_ppl"]
        summary_name_to_batch_tensor["iq_valid_p"] = model_dict[
            "stp_emb_iq_valid_p"]
        summary_name_to_batch_tensor["loss_iq_range"] = model_dict[
            "stp_emb_iq_range_penalty"]
        summary_name_to_batch_tensor["loss_iq_contour"] = model_dict[
            "stp_emb_iq_contour_penalty"]
        summary_name_to_batch_tensor["loss_iq_deviate"] = model_dict[
            "stp_emb_iq_deviate_penalty"]

    if cfg.stp_emb_vq or cfg.stp_emb_iq:
        summary_name_to_batch_tensor["contour_violation"] = model_dict[
            "contour_violation"]
        summary_name_to_batch_tensor["deviate_violation"] = model_dict[
            "deviate_violation"]

    # Summarize VAE sequence embeddings
    if cfg.seq_emb_vae:
        summary_name_to_batch_tensor["loss_kl"] = model_dict["seq_emb_vae_kl"]

    # Reconstruction loss
    summary_name_to_batch_tensor["loss_recons"] = model_dict["dec_recons_loss"]
    summary_name_to_batch_tensor["ppl_recons"] = tf.exp(
        model_dict["dec_recons_loss"])
    if cfg.dec_pred_velocity:
        summary_name_to_batch_tensor["loss_recons_velocity"] = model_dict[
            "dec_recons_velocity_loss"]
        summary_name_to_batch_tensor["ppl_recons_velocity"] = tf.exp(
            model_dict["dec_recons_velocity_loss"])

    # Create dataset summaries
    summaries = []
    summary_name_to_placeholder = {}
    for name in summary_name_to_batch_tensor:
        placeholder = tf.placeholder(tf.float32, [None])
        summary_name_to_placeholder[name] = placeholder
        summaries.append(tf.summary.scalar(name, tf.reduce_mean(placeholder)))
    if eval_gold:
        summary_name_to_placeholder["gold_diff_l2"] = gold_diff_l2_placeholder
        summaries.append(
            tf.summary.scalar("gold_diff_l2",
                              tf.reduce_mean(gold_diff_l2_placeholder)))
        summary_name_to_placeholder["gold_diff_l1"] = gold_diff_l1_placeholder
        summaries.append(
            tf.summary.scalar("gold_diff_l1",
                              tf.reduce_mean(gold_diff_l1_placeholder)))

    summaries = tf.summary.merge(summaries)
    summary_writer = tf.summary.FileWriter(FLAGS.eval_dir)

    # Create saver
    step = tf.train.get_or_create_global_step()
    saver = tf.train.Saver(genie_vars + [step], max_to_keep=None)

    def _eval_all(sess):
        """Gathers all metrics for a ckpt."""
        summaries = collections.defaultdict(list)

        if eval_gold:
            for midi_notes, buttons, seq_varlen in gold.gold_iterator([-6, 6]):
                gold_diff_l1_seq, gold_diff_l2_seq = sess.run(
                    [gold_diff_l1, gold_diff_l2], {
                        gold_feat_dict["midi_pitches"]:
                        midi_notes,
                        gold_feat_dict["delta_times_int"]:
                        np.ones_like(midi_notes) * 8,
                        gold_seq_varlens: [seq_varlen],
                        gold_buttons:
                        buttons
                    })
                summaries["gold_diff_l1"].append(gold_diff_l1_seq)
                summaries["gold_diff_l2"].append(gold_diff_l2_seq)

        while True:
            try:
                batches = sess.run(summary_name_to_batch_tensor)
            except tf.errors.OutOfRangeError:
                break

            for name, scalar in batches.items():
                summaries[name].append(scalar)

        return summaries

    # Eval
    if FLAGS.ckpt_fp is None:
        ckpt_fp = None
        while True:
            latest_ckpt_fp = tf.train.latest_checkpoint(FLAGS.train_dir)

            if latest_ckpt_fp != ckpt_fp:
                print("Eval: {}".format(latest_ckpt_fp))

                with tf.Session() as sess:
                    sess.run(tf.local_variables_initializer())
                    saver.restore(sess, latest_ckpt_fp)

                    ckpt_summaries = _eval_all(sess)
                    ckpt_summaries, ckpt_step = sess.run(
                        [summaries, step],
                        feed_dict={
                            summary_name_to_placeholder[n]: v
                            for n, v in ckpt_summaries.items()
                        })
                    summary_writer.add_summary(ckpt_summaries, ckpt_step)

                    saver.save(sess,
                               os.path.join(FLAGS.eval_dir, "ckpt"),
                               global_step=ckpt_step)

                print("Done")
                ckpt_fp = latest_ckpt_fp

            time.sleep(1)
    else:
        with tf.Session() as sess:
            sess.run(tf.local_variables_initializer())
            saver.restore(sess, FLAGS.ckpt_fp)

            ckpt_summaries = _eval_all(sess)
            ckpt_step = sess.run(step)

            print("-" * 80)
            print("Ckpt: {}".format(FLAGS.ckpt_fp))
            print("Step: {}".format(ckpt_step))
            for n, l in sorted(list(ckpt_summaries.items()),
                               key=lambda x: x[0]):
                print("{}: {}".format(n, np.mean(l)))
def create_dual_ibp_approx(num_layers,
                           batch_size,
                           action_max,
                           W_T_list,
                           b_T_list,
                           action_tensor_center,
                           return_full_info=False):

    #layers_n: number of hidden units each layer
    #W_T_list, b_T_list: multiplicatie and bias weights for each layer
    #X: raw input, y: one-hot encoding of labels

    # List of bounds (l_i,u_i) for i = 1,...,K-1
    l_list = [
        action_tensor_center - action_max * tf.ones_like(action_tensor_center)
    ]
    u_list = [
        action_tensor_center + action_max * tf.ones_like(action_tensor_center)
    ]

    # List of transition matrices D_i for i = 1,...,K-1
    D_list = [tf.zeros_like(action_tensor_center)]

    # Indicators of spanning ReLu neurons for i = 1,...,K-1
    I_list = [tf.zeros_like(action_tensor_center)]

    # Indicators of active ReLu neurons for i = 1,...,K-1
    Ip_list = [tf.zeros_like(action_tensor_center)]

    # Final list of duals nu_i for i = 1,...,K-1
    Nu_list = [
        tf.zeros([batch_size, W_T_list[0].get_shape().as_list()[1], 1])
        for i in range(num_layers - 1)
    ]

    # Initialize Nu_K
    Nu_K = -tf.expand_dims(-tf.eye(1), axis=-1)

    # Final list of b_i'*nu_{i+1} for i = 1,...,K-1
    gamma_list = [b_T_list[i] for i in range(num_layers - 1)]

    ################## get bounds for layers i = 2,...,K-1
    for i in range(2, num_layers):
        pre_l_i = l_list[-1]
        pre_u_i = u_list[-1]

        mu_i = 0.5 * (pre_l_i + pre_u_i)
        r_i = 0.5 * (pre_u_i - pre_l_i)

        l_i = tf.matmul(mu_i, W_T_list[i - 2]) - tf.matmul(
            r_i, tf.abs(W_T_list[i - 2])) + b_T_list[i - 2]

        u_i = tf.matmul(mu_i, W_T_list[i - 2]) + tf.matmul(
            r_i, tf.abs(W_T_list[i - 2])) + b_T_list[i - 2]

        l_list.append(l_i)
        u_list.append(u_i)

        # form Ip, I
        Ip_i, I_i = dual_method.get_I(l_list[-1], u_list[-1])
        I_list.append(I_i)
        Ip_list.append(Ip_i)

        # form D
        D_i = dual_method.get_D(l_list[-1], u_list[-1], Ip_i, I_i)
        D_list.append(D_i)

    ############## Go backward and form Nu_i

    # initialize Nu_{K-1} & gamma_{K-1}
    Nu_list[-1] = tf.einsum('ij,jk->ijk', D_list[-1], W_T_list[-1])
    Nu_K = tf.tile(Nu_K, [Nu_list[-1].get_shape().as_list()[0], 1, 1])
    Nu_list[-1] = tf.einsum('ijk,ikm->ijm', Nu_list[-1], Nu_K)

    gamma_list[-1] = tf.einsum('ij,ijm->im', gamma_list[-1], Nu_K)

    # initialize lv_sum
    lv_sum = tf.einsum('ij,ijm->im', l_list[-1] * I_list[-1],
                       tf.nn.relu(Nu_list[-1]))

    # update Nu_j for layers j = K-2,...,2
    # and gamma_j for layers j = K-2,...,2
    for j in range(num_layers - 2, 1, -1):
        Nu_hat_j = tf.einsum('jk,ikm->ijm', W_T_list[j - 1], Nu_list[j])

        gamma_list[j - 1] = tf.einsum('ij,ijm->im', b_T_list[j - 1],
                                      Nu_list[j])

        Nu_list[j - 1] = tf.einsum('ij,ijk->ijk', D_list[j - 1], Nu_hat_j)

        lv_sum = tf.add(
            lv_sum,
            tf.einsum('ij,ijm->im', l_list[j - 1] * I_list[j - 1],
                      tf.nn.relu(Nu_list[j - 1])))

    # update nu_hat_1 and gamma_1
    Nu_hat_1 = tf.einsum('jk,ikm->ijm', W_T_list[0], Nu_list[1])

    gamma_list[0] = tf.einsum('ij,ijm->im', b_T_list[0], Nu_list[1])

    # Compute J_tilde
    psi = tf.einsum('ij,ijm->im', action_tensor_center,
                    Nu_hat_1) + tf.add_n(gamma_list)

    Nu_hat_1_norm = tf.norm(Nu_hat_1, 1, axis=1, keepdims=False)

    J_tilde = -psi - action_max * Nu_hat_1_norm + lv_sum

    if return_full_info:
        return (-J_tilde, l_list, u_list, D_list, Nu_list, lv_sum, gamma_list,
                psi, Nu_hat_1)
    else:
        return -J_tilde
示例#27
0
  def build_train_graph(self,
                        inputs,
                        min_depth,
                        max_depth,
                        num_mpi_planes,
                        learning_rate=0.0002,
                        beta1=0.9,
                        vgg_model_file=None,
                        global_step=0):
    """Construct the training computation graph.

    Args:
      inputs: dictionary of tensors (see 'input_data' below) needed for training
      min_depth: minimum depth for the PSV and MPI planes
      max_depth: maximum depth for the PSV and MPI planes
      num_mpi_planes: number of MPI planes to infer
      learning_rate: learning rate
      beta1: hyperparameter for Adam
      vgg_model_file: path to vgg weights (needed when vgg loss is used)
      global_step: current optimization step
    Returns:
      A train_op to be used for training.
    """
    print("starting to build graph")
    with tf.name_scope("input_size_randomization"):
      dim_choices = tf.constant([[1, 16], [2, 32], [4, 32], [4, 64], [4, 128],
                                 [8, 32], [8, 64], [8, 128]],
                                dtype=tf.int32)
      rand_dim = tf.random_shuffle(dim_choices)[0, :]
      height_div = rand_dim[0]
      width_div = rand_dim[0]
      num_mpi_planes = rand_dim[1]
      tf.summary.scalar("num_mpi_planes", num_mpi_planes)

    with tf.name_scope("setup"):
      mpi_planes = self.inv_depths(min_depth, max_depth, num_mpi_planes)

    with tf.name_scope("input_data"):
      raw_tgt_image = inputs["tgt_image"]
      raw_ref_image = inputs["ref_image"]
      raw_src_images = inputs["src_images"]

      _, img_height, img_width, _ = raw_src_images.get_shape().as_list(
      )
      img_height = img_height // height_div
      img_width = img_width // width_div

      raw_tgt_image = tf.image.convert_image_dtype(
          raw_tgt_image, dtype=tf.float32)
      raw_ref_image = tf.image.convert_image_dtype(
          raw_ref_image, dtype=tf.float32)
      raw_src_images = tf.image.convert_image_dtype(
          raw_src_images, dtype=tf.float32)
      raw_tgt_image = tf.image.resize_area(raw_tgt_image,
                                           [img_height, img_width])
      raw_ref_image = tf.image.resize_area(raw_ref_image,
                                           [img_height, img_width])
      raw_src_images = tf.image.resize_area(raw_src_images,
                                            [img_height, img_width])

      tgt_pose = inputs["tgt_pose"]
      ref_pose = inputs["ref_pose"]
      src_poses = inputs["src_poses"]
      intrinsics = inputs["intrinsics"]

      # Scale intrinsics based on size randomization
      intrinsics = tf.concat([
          intrinsics[:, 0:1, :] / tf.to_float(width_div),
          intrinsics[:, 1:2, :] / tf.to_float(height_div), intrinsics[:, 2:3, :]
      ],
                             axis=1)
      inputs["intrinsics"] = intrinsics

      _, num_source, _, _ = src_poses.get_shape().as_list()

    with tf.name_scope("inference"):
      print("setting up MPI inference")
      num_mpi_planes = tf.shape(mpi_planes)[0]
      pred = self.infer_mpi(raw_src_images, raw_ref_image, ref_pose, src_poses,
                            intrinsics, num_mpi_planes,
                            mpi_planes)
      rgba_layers = pred["rgba_layers"]
      rgba_layers_refine = pred["rgba_layers_refine"]
      stuff_behind = pred["stuff_behind"]
      refine_input_mpi = pred["refine_input_mpi"]
      psv = pred["psv"]

    with tf.name_scope("synthesis"):
      print("setting up rendering")
      rel_pose = tf.matmul(tgt_pose, tf.matrix_inverse(ref_pose))
      output_image, output_layers = self.mpi_render_view(
          rgba_layers, rel_pose, mpi_planes, intrinsics)
      output_alpha = output_layers[Ellipsis, -1]
      output_image_refine, _ = self.mpi_render_view(
          rgba_layers_refine, rel_pose, mpi_planes, intrinsics)

    with tf.name_scope("loss"):
      print("computing losses")
      # Mask loss for pixels outside reference frustum
      loss_mask = tf.where(
          tf.equal(
              tf.reduce_min(
                  tf.abs(tf.reduce_sum(output_layers, axis=-1)),
                  axis=3,
                  keep_dims=True), 0.0),
          tf.zeros_like(output_alpha[:, :, :, 0:1]),
          tf.ones_like(output_alpha[:, :, :, 0:1]))
      loss_mask = tf.stop_gradient(loss_mask)
      tf.summary.image("loss_mask", loss_mask)

      # Helper functions for loss
      def compute_error(real, fake, mask):
        return tf.reduce_mean(mask * tf.abs(fake - real))

      # Normalized VGG loss (from
      # https://github.com/CQFIO/PhotographicImageSynthesis)

      downsample = lambda tensor, ds: tf.nn.avg_pool(tensor, [1, ds, ds, 1],
                                                     [1, ds, ds, 1], "SAME")

      def vgg_loss(raw_tgt_image, output_image, loss_mask):
        """Compute VGG loss."""

        vgg_real = build_vgg19(raw_tgt_image * 255.0, vgg_model_file)
        rescaled_output_image = (output_image + 1.)/2. * 255.0
        vgg_fake = build_vgg19(
            rescaled_output_image, vgg_model_file, reuse=True)
        p0 = compute_error(vgg_real["input"], vgg_fake["input"], loss_mask)
        p1 = compute_error(vgg_real["conv1_2"],
                           vgg_fake["conv1_2"],
                           loss_mask)/2.6
        p2 = compute_error(vgg_real["conv2_2"],
                           vgg_fake["conv2_2"],
                           downsample(loss_mask, 2))/4.8
        p3 = compute_error(vgg_real["conv3_2"],
                           vgg_fake["conv3_2"],
                           downsample(loss_mask, 4))/3.7
        p4 = compute_error(vgg_real["conv4_2"],
                           vgg_fake["conv4_2"],
                           downsample(loss_mask, 8))/5.6
        p5 = compute_error(vgg_real["conv5_2"],
                           vgg_fake["conv5_2"],
                           downsample(loss_mask, 16))*10/1.5
        total_loss = p0+p1+p2+p3+p4+p5
        return total_loss, vgg_real, vgg_fake

      vgg_loss_initial, _, _ = vgg_loss(raw_tgt_image, output_image, loss_mask)
      tf.summary.scalar("vgg_loss_initial", vgg_loss_initial)
      total_loss = vgg_loss_initial

      vgg_loss_refine, _, _ = vgg_loss(raw_tgt_image, output_image_refine,
                                       loss_mask)
      tf.summary.scalar("vgg_loss_refine", vgg_loss_refine)
      total_loss += vgg_loss_refine

    with tf.name_scope("train_op"):
      print("setting up train op")
      train_vars = [var for var in tf.trainable_variables()]
      optim = tf.train.AdamOptimizer(learning_rate, beta1)
      grads_and_vars = optim.compute_gradients(total_loss, var_list=train_vars)
      train_op = [optim.apply_gradients(grads_and_vars)]

    # Summaries
    tf.summary.scalar("total_loss", total_loss)
    # Source images
    for i in range(num_source):
      src_image = raw_src_images[:, :, :, i*3:(i+1)*3]
      tf.summary.image("src_image_%d" % i, src_image)
    # Output image
    tf.summary.image("output_image", self.deprocess_image(output_image))
    # Refined output image
    tf.summary.image("output_image_refine",
                     self.deprocess_image(output_image_refine))
    # Target image
    tf.summary.image("tgt_image", raw_tgt_image)
    # Ref image
    tf.summary.image("ref_image", raw_ref_image)
    # Predicted color and alpha layers, and PSV
    num_summ = 16  # Number of plane summaries to show in tensorboard
    for i in range(num_summ):
      ind = tf.to_int32(i * num_mpi_planes/num_summ)
      rgb = rgba_layers[:, :, :, ind, :3]
      alpha = rgba_layers[:, :, :, ind, -1:]
      ref_plane = psv[:, :, :, ind, 3:6]
      source_plane = psv[:, :, :, ind, :3]
      output_rgb = output_layers[:, :, :, ind, :3]
      tf.summary.image("rgb_layer_%d" % i, self.deprocess_image(rgb))
      tf.summary.image("alpha_layer_%d" % i, alpha)
      tf.summary.image("rgba_layer_%d" % i, self.deprocess_image(rgb * alpha))
      tf.summary.image("psv_avg_%d" % i,
                       (self.deprocess_image(0.5*ref_plane + 0.5*source_plane)))
      tf.summary.image("output_rgb_%d" % i,
                       self.deprocess_image(output_rgb))
      tf.summary.image("psv_ref_%d" % i, self.deprocess_image(ref_plane))
      tf.summary.image("psv_source_%d" % i, self.deprocess_image(source_plane))

    # Cumulative rendered images and refined MPI
    for i in range(num_summ):
      ind = tf.to_int32(i * num_mpi_planes/num_summ)
      rgb = rgba_layers_refine[:, :, :, ind, :3]
      alpha = rgba_layers_refine[:, :, :, ind, 3:]
      render = stuff_behind[:, :, :, ind, :3]
      input_colors = refine_input_mpi[:, :, :, ind, :3]
      tf.summary.image("rgb_layer_refine_%d" % i, self.deprocess_image(rgb))
      tf.summary.image("alpha_layer_refine_%d" % i, alpha)
      tf.summary.image("rgba_layer_refine_%d" % i,
                       self.deprocess_image(rgb * alpha))
      tf.summary.image("cumulative_render_%d" % i, self.deprocess_image(render))
      tf.summary.image("input_colors_refine_%d" % i,
                       self.deprocess_image(input_colors))

    return train_op
示例#28
0
 def mask(target, mask):
     mask_h = tf.abs(mask - 1)
     return tf.stop_gradient(mask_h * target) + mask * target
示例#29
0
  def _compute_inner_update_scinol(self, var, grad, state):
    update_ops = []

    betting_domain = tf.cast(
        state.get_hyper(BETTING_DOMAIN), var.dtype.base_dtype)

    reward = state.get_slot(var, INNER_REWARD)
    betting_fraction = state.get_slot(var, OUTER_BETTING_FRACTION)
    sum_grad_squared = state.get_slot(var, INNER_SUM_GRAD_SQUARED)
    sum_grad = state.get_slot(var, INNER_SUM_GRAD)
    inner_maximum_gradient = state.get_slot(var, INNER_MAXIMUM_GRADIENT)

    # clip inner gradient to respect previous inner_maximum_gradient value
    # This introduces at most an additive constant overhead in the regret
    # since the inner betting fraction lies in a bounded domain.
    clipped_grad = tf.clip_by_value(grad, -inner_maximum_gradient,
                                    inner_maximum_gradient)

    with tf.control_dependencies([clipped_grad]):
      inner_maximum_gradient_updated = self._assign(
          inner_maximum_gradient,
          tf.maximum(inner_maximum_gradient, tf.abs(grad)))
      update_ops.append(inner_maximum_gradient_updated)

    clipped_old_betting_fraction = tf.clip_by_value(betting_fraction,
                                                    -betting_domain,
                                                    betting_domain)

    # Process grad to respect truncation to [-betting_domain, betting_domain]
    truncated_grad = tf.where(
        tf.greater_equal(
            clipped_grad * (betting_fraction - clipped_old_betting_fraction),
            0.0), clipped_grad, tf.zeros(tf.shape(clipped_grad)))

    reward_delta = -betting_fraction * truncated_grad
    reward_updated = self._assign_add(reward, reward_delta)
    update_ops.append(reward_updated)

    sum_grad_squared_updated = self._assign_add(sum_grad_squared,
                                                tf.square(truncated_grad))
    update_ops.append(sum_grad_squared_updated)

    sum_grad_updated = self._assign_add(sum_grad, truncated_grad)
    update_ops.append(sum_grad_updated)

    # The second term in this maximum, inner_maximum_gradient_updated / self.eta
    # is a hack to force the betting fraction to not be too big at first.
    scaling = tf.minimum(tf.rsqrt(sum_grad_squared_updated +
                tf.square(inner_maximum_gradient_updated)),
                         self.eta/inner_maximum_gradient_updated)
    theta = -sum_grad_updated * scaling

    # rescale inner flag is a hack that rescales the epsilon_v by the
    # maximum inner gradient.
    if self.rescale_inner:
      epsilon_scaling = inner_maximum_gradient_updated
    else:
      epsilon_scaling = 1.0

    inner_betting_fraction = tf.sign(theta) * tf.minimum(tf.abs(theta),
                                                         1.0) * scaling / 2.0
    new_betting_fraction = inner_betting_fraction * (
        reward_updated + epsilon_scaling * self.epsilon_v)

    betting_fraction_updated = self._assign(betting_fraction,
                                            new_betting_fraction)
    update_ops.append(betting_fraction_updated)

    clipped_betting_fraction = tf.clip_by_value(betting_fraction_updated,
                                                -betting_domain, betting_domain)

    if self.output_summaries:
      mean_unclipped_betting_fraction_summary = tf.reduce_mean(
          tf.abs(betting_fraction_updated))
      max_unclipped_betting_fraction_summary = tf.reduce_max(
          tf.abs(betting_fraction_updated))

      mean_clipped_betting_fraction_summary = tf.reduce_mean(
          tf.abs(clipped_betting_fraction))
      max_clipped_betting_fraction_summary = tf.reduce_max(
          tf.abs(clipped_betting_fraction))

      max_abs_gradient = tf.reduce_max(tf.abs(grad))
      max_truncated_grad = tf.reduce_max(tf.abs(truncated_grad))

      tf.summary.scalar(self._name + "/mean_unclipped_bet/" + var.name,
                        mean_unclipped_betting_fraction_summary)
      tf.summary.scalar(self._name + "/max_unclipped_bet/" + var.name,
                        max_unclipped_betting_fraction_summary)
      tf.summary.scalar(self._name + "/mean_clipped_bet/" + var.name,
                        mean_clipped_betting_fraction_summary)
      tf.summary.scalar(self._name + "/max_clipped_bet/" + var.name,
                        max_clipped_betting_fraction_summary)

      tf.summary.scalar(self._name + "/max_abs_inner_grad/" + var.name,
                        max_abs_gradient)
      tf.summary.scalar(
          self._name + "/max_abs_truncated_inner_grad/" + var.name,
          max_truncated_grad)
    return clipped_betting_fraction, tf.group(*update_ops)
示例#30
0
    def _build_train_op(self):
        """Builds a training op.

    Returns:
      train_op: An op performing one step of training from replay data.
    """
        batch_size = tf.shape(self._replay.rewards)[0]

        target_quantile_values = tf.stop_gradient(
            self._build_target_quantile_values_op())
        # Reshape to self.num_tau_prime_samples x batch_size x 1 since this is
        # the manner in which the target_quantile_values are tiled.
        target_quantile_values = tf.reshape(
            target_quantile_values,
            [self.num_tau_prime_samples, batch_size, 1])
        # Transpose dimensions so that the dimensionality is batch_size x
        # self.num_tau_prime_samples x 1 to prepare for computation of
        # Bellman errors.
        # Final shape of target_quantile_values:
        # batch_size x num_tau_prime_samples x 1.
        target_quantile_values = tf.transpose(target_quantile_values,
                                              [1, 0, 2])

        # Shape of indices: (num_tau_samples x batch_size) x 1.
        # Expand dimension by one so that it can be used to index into all the
        # quantiles when using the tf.gather_nd function (see below).
        indices = tf.range(self.num_tau_samples * batch_size)[:, None]

        # Expand the dimension by one so that it can be used to index into all the
        # quantiles when using the tf.gather_nd function (see below).
        reshaped_actions = self._replay.actions[:, None]
        reshaped_actions = tf.tile(reshaped_actions, [self.num_tau_samples, 1])
        # Shape of reshaped_actions: (num_tau_samples x batch_size) x 2.
        reshaped_actions = tf.concat([indices, reshaped_actions], axis=1)

        chosen_action_quantile_values = tf.gather_nd(
            self._replay_net_quantile_values, reshaped_actions)
        # Reshape to self.num_tau_samples x batch_size x 1 since this is the manner
        # in which the quantile values are tiled.
        chosen_action_quantile_values = tf.reshape(
            chosen_action_quantile_values,
            [self.num_tau_samples, batch_size, 1])
        # Transpose dimensions so that the dimensionality is batch_size x
        # self.num_tau_samples x 1 to prepare for computation of
        # Bellman errors.
        # Final shape of chosen_action_quantile_values:
        # batch_size x num_tau_samples x 1.
        chosen_action_quantile_values = tf.transpose(
            chosen_action_quantile_values, [1, 0, 2])

        # Shape of bellman_erors and huber_loss:
        # batch_size x num_tau_prime_samples x num_tau_samples x 1.
        bellman_errors = target_quantile_values[:, :,
                                                None, :] - chosen_action_quantile_values[:,
                                                                                         None, :, :]
        # The huber loss (see Section 2.3 of the paper) is defined via two cases:
        # case_one: |bellman_errors| <= kappa
        # case_two: |bellman_errors| > kappa
        huber_loss_case_one = (
            tf.cast(tf.abs(bellman_errors) <= self.kappa, tf.float32) * 0.5 *
            bellman_errors**2)
        huber_loss_case_two = (
            tf.cast(tf.abs(bellman_errors) > self.kappa, tf.float32) *
            self.kappa * (tf.abs(bellman_errors) - 0.5 * self.kappa))
        huber_loss = huber_loss_case_one + huber_loss_case_two

        # Reshape replay_quantiles to batch_size x num_tau_samples x 1
        replay_quantiles = tf.reshape(self._replay_net_quantiles,
                                      [self.num_tau_samples, batch_size, 1])
        replay_quantiles = tf.transpose(replay_quantiles, [1, 0, 2])

        # Tile by num_tau_prime_samples along a new dimension. Shape is now
        # batch_size x num_tau_prime_samples x num_tau_samples x 1.
        # These quantiles will be used for computation of the quantile huber loss
        # below (see section 2.3 of the paper).
        replay_quantiles = tf.cast(
            tf.tile(replay_quantiles[:, None, :, :],
                    [1, self.num_tau_prime_samples, 1, 1]), tf.float32)
        # Shape: batch_size x num_tau_prime_samples x num_tau_samples x 1.
        quantile_huber_loss = (
            tf.abs(replay_quantiles -
                   tf.stop_gradient(tf.cast(bellman_errors < 0, tf.float32))) *
            huber_loss) / self.kappa
        # Sum over current quantile value (num_tau_samples) dimension,
        # average over target quantile value (num_tau_prime_samples) dimension.
        # Shape: batch_size x num_tau_prime_samples x 1.
        loss = tf.reduce_sum(quantile_huber_loss, axis=2)
        # Shape: batch_size x 1.
        loss = tf.reduce_mean(loss, axis=1)

        update_priorities_op = tf.no_op()
        with tf.control_dependencies([update_priorities_op]):
            if self.summary_writer is not None:
                with tf.variable_scope('Losses'):
                    tf.summary.scalar('QuantileLoss', tf.reduce_mean(loss))
            return self.optimizer.minimize(
                tf.reduce_mean(loss)), tf.reduce_mean(loss)