示例#1
0
    def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
                 normalize=False, use_recurrent=False):
        LearningModel.__init__(self, m_size, normalize, use_recurrent, brain)

        num_streams = 1
        hidden_streams = self.create_new_obs(num_streams, h_size, n_layers)
        hidden = hidden_streams[0]
        self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
        hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
        if self.use_recurrent:
            self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
            hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in)
            self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
        self.policy = tf.layers.dense(hidden_reg, self.a_size, activation=None, use_bias=False,
                                      kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))

        if brain.vector_action_space_type == "discrete":
            self.action_probs = tf.nn.softmax(self.policy)
            self.sample_action_float = tf.multinomial(self.policy, 1)
            self.sample_action_float = tf.identity(self.sample_action_float, name="action")
            self.sample_action = tf.cast(self.sample_action_float, tf.int32)
            self.true_action = tf.placeholder(shape=[None], dtype=tf.int32, name="teacher_action")
            self.action_oh = tf.one_hot(self.true_action, self.a_size)
            self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
            self.action_percent = tf.reduce_mean(tf.cast(
                tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
        else:
            self.sample_action = tf.identity(self.policy, name="action")
            self.true_action = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name="teacher_action")
            self.loss = tf.reduce_sum(tf.squared_difference(self.true_action, self.sample_action))

        optimizer = tf.train.AdamOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss)
示例#2
0
        def _g_recurrence_2(i, x_t, gen_x, h_tm1, h_tm1_manager, last_goal, real_goal):
            # with tf.device('/cpu:0'):
            cur_sen = tf.cond(i > 0, lambda:
            tf.split(tf.concat([tf.transpose(gen_x.stack(), perm=[1, 0]), self.padding_array], 1),
                     [self.sequence_length, i - 1], 1)[0], lambda: self.padding_array)
            with tf.variable_scope(self.scope):
                feature = self.FeatureExtractor_unit(cur_sen, self.drop_out)
            h_t_Worker = self.g_worker_recurrent_unit(x_t, h_tm1)  # hidden_memory_tuple
            o_t_Worker = self.g_worker_output_unit(h_t_Worker)  # batch x vocab , logits not prob

            o_t_Worker = tf.reshape(o_t_Worker, [self.batch_size, self.num_vocabulary, self.goal_size])

            h_t_manager = self.g_manager_recurrent_unit(feature, h_tm1_manager)
            sub_goal = self.g_manager_output_unit(h_t_manager)
            sub_goal = tf.nn.l2_normalize(sub_goal, 1)

            real_sub_goal = tf.add(last_goal, sub_goal)
            w_g = tf.matmul(real_goal, self.g_change)  # batch x goal_size
            w_g = tf.nn.l2_normalize(w_g, 1)
            w_g = tf.expand_dims(w_g, 2)  # batch x goal_size x 1

            x_logits = tf.matmul(o_t_Worker, w_g)
            x_logits = tf.squeeze(x_logits)

            log_prob = tf.log(tf.nn.softmax(x_logits))
            next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32)
            x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token)  # batch x emb_dim
            with tf.control_dependencies([cur_sen]):
                gen_x = gen_x.write(i - 1, next_token)  # indices, batch_size
            return i + 1, x_tp1, gen_x, h_t_Worker, h_t_manager, \
                   tf.cond(((i) % self.step_size) > 0, lambda: real_sub_goal,
                           lambda: tf.constant(0.0, shape=[self.batch_size, self.goal_out_size])), \
                   tf.cond(((i) % self.step_size) > 0, lambda: real_goal, lambda: real_sub_goal)
示例#3
0
 def testSmallEntropy(self):
   with self.test_session(use_gpu=self.use_gpu):
     # A logit value of -10 corresponds to a probability of ~5e-5.
     logits = tf.constant([[-10., 10., -10.], [-10., -10., 10.]])
     num_samples = 1000
     samples = tf.multinomial(logits, num_samples).eval()
     self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples)
示例#4
0
 def body(i, prev_base_state, prev_high_states, prev_y, prev_emb,
          y_array):
     state1 = decoder.grustep1.forward(prev_base_state, prev_emb)
     att_ctx = decoder.attstep.forward(state1)
     base_state = decoder.grustep2.forward(state1, att_ctx)
     if decoder.high_gru_stack == None:
         output = base_state
         high_states = []
     else:
         if decoder.high_gru_stack.context_state_size == 0:
             output, high_states = decoder.high_gru_stack.forward_single(
                 prev_high_states, base_state)
         else:
             output, high_states = decoder.high_gru_stack.forward_single(
                 prev_high_states, base_state, context=att_ctx)
     logits = decoder.predictor.get_logits(prev_emb, output, att_ctx,
                                        multi_step=False)
     new_y = tf.multinomial(logits, num_samples=1)
     new_y = tf.cast(new_y, dtype=tf.int32)
     new_y = tf.squeeze(new_y, axis=1)
     new_y = tf.where(tf.equal(prev_y, tf.constant(0, dtype=tf.int32)),
                      tf.zeros_like(new_y), new_y)
     y_array = y_array.write(index=i, value=new_y)
     new_emb = decoder.y_emb_layer.forward(new_y, factor=0)
     return i+1, base_state, high_states, new_y, new_emb, y_array
示例#5
0
 def testEmpty(self):
   classes = 5
   with self.test_session(use_gpu=self.use_gpu):
     for batch in 0, 3:
       for samples in 0, 7:
         x = tf.multinomial(tf.zeros([batch, classes]), samples).eval()
         self.assertEqual(x.shape, (batch, samples))
示例#6
0
文件: model.py 项目: dbolshak/DPGAN
  def loop_function(prev,_):

    prev = tf.nn.xw_plus_b(
          prev, output_projection[0], output_projection[1])
    prev_symbol = tf.cast(tf.reshape(tf.multinomial(prev, 1), [FLAGS.batch_size*FLAGS.max_dec_sen_num]), tf.int32)
    emb_prev = tf.nn.embedding_lookup(embedding, prev_symbol)
    return emb_prev
示例#7
0
  def UpdateProbs(self, inp):
    """Update probabilities of each particle based on 2D matrix inp which is a 2D perspectiuve projection of the scene"""

    projection, onscreen = self.project()
    filtered_projection = tf.to_int64(tf.select(onscreen, projection, tf.zeros_like(projection)))
    per_state_probabilities = tf.gather_nd(inp, filtered_projection)
    
    filtered_probabilities = tf.select(onscreen, per_state_probabilities, tf.zeros_like(per_state_probabilities))
    
    new_state_indicies = tf.squeeze(tf.multinomial(tf.expand_dims(tf.log(filtered_probabilities),0), self.particles/10*9))
    
    new_state = tf.gather(self.state, new_state_indicies)
    
    # Add momentum
    new_state = tf.concat(1, [new_state[:, 0:3] + new_state[:, 3:6], new_state[:, 3:10]])
    
    # Add in particles for the "just come onscreen" case.
    new_state = tf.concat(0, [new_state, tf.random_normal([self.particles/10, 10]) * self.initial_std + self.initial_bias])

    
    new_state = new_state + tf.random_normal([self.particles, 10]) * self.update_std
    # Todo:  permute state by adding noise.

    
    return self.state.assign(new_state)
示例#8
0
  def call(self, inputs):
    """Calculates logits and action.

    Args:
      inputs: Observations from a step in the cart-pole environment, of shape
        `(batch_size, input_size)`

    Returns:
      logits: the logits output by the output layer. This can be viewed as the
        likelihood vales of choosing the left (0) action. Shape:
        `(batch_size, 1)`.
      actions: randomly selected actions ({0, 1}) based on the logits. Shape:
        `(batch_size, 1)`.
    """
    hidden = self._hidden_layer(inputs)
    logits = self._output_layer(hidden)

    left_prob = tf.nn.sigmoid(logits)
    action_probs = tf.concat([left_prob, 1.0 - left_prob], 1)

    self._grad_fn = eager.implicit_gradients(
        self._get_cross_entropy_and_save_actions)

    actions = tf.multinomial(tf.log(action_probs), 1)
    return logits, actions
示例#9
0
 def multinomial_squeeze(logits, temperature=1.0):
   logits_shape = common_layers.shape_list(logits)
   reshaped_logits = (
       tf.reshape(logits, [-1, logits_shape[-1]]) / temperature)
   choices = tf.multinomial(reshaped_logits, 1)
   choices = tf.reshape(choices, logits_shape[:-1])
   return choices
示例#10
0
  def generate_string(self, initial_logits, initial_state, sequence_length):
    """Builds sub-graph to generate a string, sampled from the model.

    Args:
      initial_logits: Starting logits to sampling from.
      initial_state: Starting state for the RNN core.
      sequence_length: Number of characters to sample.

    Returns:
      A Tensor of characters, with dimensions `[sequence_length, batch_size,
      output_size]`.
    """

    current_logits = initial_logits
    current_state = initial_state

    generated_letters = []
    for _ in range(sequence_length):
      # Sample a character index from distribution.
      char_index = tf.squeeze(tf.multinomial(current_logits, 1))
      char_one_hot = tf.one_hot(char_index, self._output_size, 1.0, 0.0)
      generated_letters.append(char_one_hot)

      # Feed character back into the deep_lstm.
      gen_out_seq, current_state = self._core(
          tf.nn.relu(self._embed_module(char_one_hot)),
          current_state)
      current_logits = self._output_module(gen_out_seq)

    generated_string = tf.stack(generated_letters)

    return generated_string
示例#11
0
    def create_dc_actor_critic(self, h_size, num_layers):
        num_streams = 1
        hidden_streams = self.create_new_obs(num_streams, h_size, num_layers)
        hidden = hidden_streams[0]

        if self.use_recurrent:
            tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
            self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32, name='prev_action')
            self.prev_action_oh = c_layers.one_hot_encoding(self.prev_action, self.a_size)
            hidden = tf.concat([hidden, self.prev_action_oh], axis=1)

            self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
            hidden, self.memory_out = self.create_recurrent_encoder(hidden, self.memory_in)
            self.memory_out = tf.identity(self.memory_out, name='recurrent_out')

        self.policy = tf.layers.dense(hidden, self.a_size, activation=None, use_bias=False,
                                      kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))

        self.all_probs = tf.nn.softmax(self.policy, name="action_probs")
        self.output = tf.multinomial(self.policy, 1)
        self.output = tf.identity(self.output, name="action")

        self.value = tf.layers.dense(hidden, 1, activation=None)
        self.value = tf.identity(self.value, name="value_estimate")
        self.entropy = -tf.reduce_sum(self.all_probs * tf.log(self.all_probs + 1e-10), axis=1)
        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
        self.selected_actions = c_layers.one_hot_encoding(self.action_holder, self.a_size)

        self.all_old_probs = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name='old_probabilities')

        # We reshape these tensors to [batch x 1] in order to be of the same rank as continuous control probabilities.
        self.probs = tf.expand_dims(tf.reduce_sum(self.all_probs * self.selected_actions, axis=1), 1)
        self.old_probs = tf.expand_dims(tf.reduce_sum(self.all_old_probs * self.selected_actions, axis=1), 1)
示例#12
0
 def decoder_fn(time, cell_state, cell_input, cell_output, context_state):
     with tf.name_scope(name, "simple_decoder_fn_inference",
                        [time, cell_state, cell_input, cell_output,
                         context_state]):
         if cell_input is not None:
             raise ValueError("Expected cell_input to be None, but saw: %s" %
                              cell_input)
         if cell_output is None:
             # invariant that this is time == 0
             next_input_id = tf.ones([batch_size], dtype=dtype) * (
                 start_of_sequence_id)
             done = tf.zeros([batch_size], dtype=tf.bool)
             cell_state = encoder_state
             cell_output = tf.zeros([cell_size],
                                    dtype=tf.float32)
         else:
             softmax_output = output_fn(cell_output)
             if sample:
                 next_input_id = tf.squeeze(tf.multinomial(softmax_output, 1), 1)
             else:
                 next_input_id = tf.argmax(softmax_output, 1)
             next_input_id = tf.cast(next_input_id, dtype=dtype)
             done = tf.equal(next_input_id, end_of_sequence_id)
         next_input = tf.gather(embeddings, next_input_id)
         # if time > maxlen, return all true vector
         done = tf.cond(
             tf.greater(time, maximum_length),
             lambda: tf.ones([batch_size], dtype=tf.bool),
             lambda: done)
         return (done, cell_state, next_input, next_input_id, context_state)
示例#13
0
    def __init__(self, q_values, observations, num_actions, stochastic, eps,
                 softmax, softmax_temp):
        if softmax:
            action_dist = Categorical(q_values / softmax_temp)
            self.action = action_dist.sample()
            self.action_prob = action_dist.sampled_action_prob()
            return

        deterministic_actions = tf.argmax(q_values, axis=1)
        batch_size = tf.shape(observations)[0]

        # Special case masked out actions (q_value ~= -inf) so that we don't
        # even consider them for exploration.
        random_valid_action_logits = tf.where(
            tf.equal(q_values, tf.float32.min),
            tf.ones_like(q_values) * tf.float32.min, tf.ones_like(q_values))
        random_actions = tf.squeeze(
            tf.multinomial(random_valid_action_logits, 1), axis=1)

        chose_random = tf.random_uniform(
            tf.stack([batch_size]), minval=0, maxval=1, dtype=tf.float32) < eps
        stochastic_actions = tf.where(chose_random, random_actions,
                                      deterministic_actions)
        self.action = tf.cond(stochastic, lambda: stochastic_actions,
                              lambda: deterministic_actions)
        self.action_prob = None
示例#14
0
 def _sample_single(args):
   logits, n_draw = args[0], args[1]  # [K], []
   x = tf.multinomial(logits[tf.newaxis, ...], n_draw,
                      seed)  # [1, n*n_draw]
   x = tf.reshape(x, shape=[n, -1])  # [n, n_draw]
   x = tf.reduce_sum(tf.one_hot(x, depth=k), axis=-2)  # [n, k]
   return x
示例#15
0
    def sample(self, projected_output):
        """Return integer ID tensor representing the sampled word.
        
        Args:
            projected_output: Tensor [1, 1, state_size], representing a single
                decoder timestep output. 
        """
        # TODO: We really need a tf.control_dependencies check here (for rank).
        with tf.name_scope('decoder_sampler', values=[projected_output]):

            # Protect against extra size-1 dimensions; grab the 1D tensor
            # of size state_size.
            logits = tf.squeeze(projected_output)
            if self.temperature < 0.02:
                return tf.argmax(logits, axis=0)

            # Convert logits to probability distribution.
            probabilities = tf.div(logits, self.temperature)
            projected_output = tf.div(
                tf.exp(probabilities),
                tf.reduce_sum(tf.exp(probabilities), axis=-1))

            # Sample 1 time from the probability distribution.
            sample_ID = tf.squeeze(
                tf.multinomial(tf.expand_dims(probabilities, 0), 1))
        return sample_ID
示例#16
0
 def testNegativeMinLogits(self):
   tf.set_random_seed(78844)
   with self.test_session(use_gpu=self.use_gpu):
     logits = tf.constant([[np.finfo(np.float32).min] * 1023 + [0]])
     num_samples = 1000
     samples = tf.multinomial(logits, num_samples).eval()
     self.assertAllEqual([[1023] * num_samples], samples)
    def build_generator(self):
        """
        Generator for generating captions
        Support sample max or sample from distribution
        No Beam search here; beam search is in decoder
        """
        # Variables for the sample setting
        self.sample_max = tf.Variable(True, trainable = False, name = "sample_max")
        self.sample_temperature = tf.Variable(1.0, trainable = False, name = "temperature")

        self.generator = []
        with tf.variable_scope("rnnlm"):
            flattened_ctx = tf.reshape(self.context, [self.batch_size, 196, 512])
            ctx_mean = tf.reduce_mean(flattened_ctx, 1)

            tf.get_variable_scope().reuse_variables()

            initial_state = utils.get_initial_state(ctx_mean, self.cell.state_size)

            #projected context
            # This is used in attention module; do this outside the loop to reduce redundant computations
            # with tf.variable_scope("attention"):
            if self.att_hid_size == 0:
                pctx = slim.fully_connected(flattened_ctx, 1, activation_fn = None, scope = 'ctx_att') # (batch) * 196 * 1
            else:
                pctx = slim.fully_connected(flattened_ctx, self.att_hid_size, activation_fn = None, scope = 'ctx_att') # (batch) * 196 * att_hid_size

            rnn_input = tf.nn.embedding_lookup(self.Wemb, tf.zeros([self.batch_size], tf.int32))

            prev_h = utils.last_hidden_vec(initial_state)

            self.g_alphas = []
            outputs = []
            state = initial_state
            for ind in range(MAX_STEPS):

                with tf.variable_scope("attention"):
                    alpha = self.get_alpha(prev_h, pctx)
                    self.g_alphas.append(alpha)
                    weighted_context = tf.reduce_sum(flattened_ctx * tf.expand_dims(alpha, 2), 1)

                output, state = self.cell(tf.concat(axis=1, values=[weighted_context, rnn_input]), state)
                outputs.append(output)
                prev_h = output

                # Get the input of next timestep
                prev_logit = slim.fully_connected(prev_h, self.vocab_size + 1, activation_fn = None, scope = 'logit')
                prev_symbol = tf.stop_gradient(tf.cond(self.sample_max,
                    lambda: tf.argmax(prev_logit, 1), # pick the word with largest probability as the input of next time step
                    lambda: tf.squeeze(
                        tf.multinomial(tf.nn.log_softmax(prev_logit) / self.sample_temperature, 1), 1))) # Sample from the distribution
                self.generator.append(prev_symbol)
                rnn_input = tf.nn.embedding_lookup(self.Wemb, prev_symbol)
            
            self.g_output = output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, self.rnn_size]) # outputs[1:], because we don't calculate loss on time 0.
            self.g_logits = logits = slim.fully_connected(output, self.vocab_size + 1, activation_fn = None, scope = 'logit')
            self.g_probs = probs = tf.reshape(tf.nn.softmax(logits), [self.batch_size, MAX_STEPS, self.vocab_size + 1])

        self.generator = tf.transpose(tf.reshape(tf.concat(axis=0, values=self.generator), [MAX_STEPS, -1]))
示例#18
0
文件: rnn_gan_lm.py 项目: igul222/nn
def st_sampler(logits):
    """straight-through stochastic sampler"""
    flat_samples = tf.reshape(tf.multinomial(tf.reshape(logits, [-1, len(charmap)]), 1), [-1])
    onehot = tf.reshape(tf.one_hot(flat_samples, len(charmap)), tf.shape(logits))

    residual = onehot - logits
    onehot = logits + tf.stop_gradient(residual)
    return onehot
示例#19
0
def multinomial_sample(x, vocab_size, temperature):
  """Multinomial sampling from a n-dimensional tensor."""
  if temperature > 0:
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
  else:
    samples = tf.argmax(x, axis=-1)
  reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1])
  return tf.to_int32(reshaped_samples)
示例#20
0
 def _g_recurrence_2(i, x_t, h_tm1, given_num, gen_x):
     h_t = self.g_recurrent_unit(x_t, h_tm1)  # hidden_memory_tuple
     o_t = self.g_output_unit(h_t)  # batch x vocab , logits not prob
     log_prob = tf.log(tf.nn.softmax(o_t))
     next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32)
     x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token)  # batch x emb_dim
     gen_x = gen_x.write(i, next_token)  # indices, batch_size
     return i + 1, x_tp1, h_t, given_num, gen_x
示例#21
0
 def sample_from_logits(logits):
   with tf.control_dependencies([tf.assert_greater(temperature, 0.0)]):
     logits = tf.identity(logits)
   reshaped_logits = (
       tf.reshape(logits, [-1, tf.shape(logits)[-1]]) / temperature)
   choices = tf.multinomial(reshaped_logits, 1)
   choices = tf.reshape(choices,
                        tf.shape(logits)[:logits.get_shape().ndims - 1])
   return choices
示例#22
0
 def provide_one_hot_labels(self, batch_size):
   """Provides one hot labels."""
   pitch_counts = self.get_pitch_counts()
   pitches = sorted(pitch_counts.keys())
   counts = [pitch_counts[p] for p in pitches]
   indices = tf.reshape(
       tf.multinomial(tf.log([tf.to_float(counts)]), batch_size), [batch_size])
   one_hot_labels = tf.one_hot(indices, depth=len(pitches))
   return one_hot_labels
 def loop(prev, i):
     with tf.variable_scope(rnnlm_scope):
         prev = slim.fully_connected(prev, self.vocab_size + 1, activation_fn = None, scope = 'logit')                
         prev_symbol = tf.stop_gradient(tf.cond(self.sample_max,
             lambda: tf.argmax(prev, 1), # pick the word with largest probability as the input of next time step
             lambda: tf.squeeze(
                 tf.multinomial(tf.nn.log_softmax(prev) / self.sample_temperature, 1), 1))) # Sample from the distribution
         self.generator.append(prev_symbol)
         return tf.nn.embedding_lookup(self.Wemb, prev_symbol)
示例#24
0
文件: model.py 项目: Soledad89/SeqGAN
 def _pretrain_recurrence(i, x_t, h_tm1, g_predictions):
     h_t = self.g_recurrent_unit(x_t, h_tm1)
     o_t = self.g_output_unit(h_t)
     log_prob = tf.log(tf.nn.softmax(o_t))
     next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32)
     g_predictions = g_predictions.write(i, tf.nn.softmax(o_t))  # batch x vocab_size
     x_tp1 = tf.cond(tf.less(tf.constant(random.random()), self.curriculum_rate), lambda: ta_emb_x.read(i),
                     lambda: tf.nn.embedding_lookup(self.g_embeddings, next_token))
     return i + 1, x_tp1, h_t, g_predictions
 def testLargeLogits(self):
   for neg in [True, False]:
     with self.test_session(use_gpu=self.use_gpu):
       logits = np.array([[1000.] * 5])
       if neg:
         logits *= -1
       samples = tf.multinomial(logits, 10).eval()
     # Sampled classes should be in-range.
     self.assertTrue((samples >= 0).all())
     self.assertTrue((samples < 5).all())
示例#26
0
def create_model(seq, temp, vocab, hidden=HIDDEN_SIZE):
    seq = tf.one_hot(seq, len(vocab))
    output, in_state, out_state = create_rnn(seq, hidden)
    # fully_connected is syntactic sugar for tf.matmul(w, output) + b
    # it will create w and b for us
    logits = tf.contrib.layers.fully_connected(output, len(vocab), None)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits[:, :-1], seq[:, 1:]))
    # sample the next word from Maxwell-Boltzmann Distribution with temperature temp
    sample = tf.multinomial(tf.exp(logits[:, -1] / temp), 1)[:, 0]
    return loss, sample, in_state, out_state
示例#27
0
 def _g_recurrence_temperature(i, x_t, h_tm1, gen_o_temp, gen_x_temp, alpha):
     h_t = self.g_recurrent_unit(x_t, h_tm1)  # hidden_memory_tuple
     o_t = self.g_output_unit(h_t)/alpha  # batch x vocab , logits not prob
     log_prob = tf.log(tf.nn.softmax(o_t))
     next_token = tf.cast(tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32)
     x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token)  # batch x emb_dim
     gen_o_temp = gen_o_temp.write(i, tf.reduce_sum(tf.multiply(tf.one_hot(next_token, self.num_vocabulary, 1.0, 0.0),
                                                                tf.nn.softmax(o_t)), 1))  # [batch_size] , prob
     gen_x_temp = gen_x_temp.write(i, next_token)  # indices, batch_size
     return i + 1, x_tp1, h_t, gen_o_temp, gen_x_temp, alpha
示例#28
0
 def body(past, prev, output):
     next_outputs = step(hparams, prev[:, tf.newaxis], past=past)
     logits = next_outputs['logits'][:, -1, :]  / tf.to_float(temperature)
     logits = top_k_logits(logits, k=top_k)
     samples = tf.multinomial(logits, num_samples=1, output_dtype=tf.int32)
     return [
         tf.concat([past, next_outputs['presents']], axis=-2),
         tf.squeeze(samples, axis=[1]),
         tf.concat([output, samples], axis=1),
     ]
 def create_model(self):
     seq = tf.one_hot(self.seq, len(self.vocab))
     self.create_rnn(seq)
     self.logits = tf.layers.dense(self.output, len(self.vocab), None)
     loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits[:, :-1], 
                                                     labels=seq[:, 1:])
     self.loss = tf.reduce_sum(loss)
     # sample the next character from Maxwell-Boltzmann Distribution 
     # with temperature temp. It works equally well without tf.exp
     self.sample = tf.multinomial(tf.exp(self.logits[:, -1] / self.temp), 1)[:, 0] 
     self.opt = tf.train.AdamOptimizer(self.lr).minimize(self.loss, global_step=self.gstep)
示例#30
0
 def __init__(self, dim):
     self._dim = dim
     weights_var = tf.placeholder(
         dtype=tf.float32,
         shape=(None, dim),
         name="weights"
     )
     self._f_sample = tensor_utils.compile_function(
         inputs=[weights_var],
         outputs=tf.multinomial(weights_var, num_samples=1)[:, 0],
     )
示例#31
0
    def create_variables(self):
        with tf.name_scope("model_inputs"):
            self.states = tf.placeholder(dtype=tf.float32,
                                         shape=(None, self.state_dim),
                                         name="states")

        # rollout action based on current policy
        with tf.name_scope("predict_actions"):
            # initialize actor-critic network
            with tf.variable_scope("actor_network"):
                self.policy_outputs = self.actor_network(self.states)
            with tf.variable_scope("critic_network"):
                self.value_outputs = self.critic_network(self.states)

            # predict actions from policy network
            self.action_scores = tf.identity(self.policy_outputs,
                                             name="action_scores")
            self.predicted_actions = tf.multinomial(self.action_scores, 1)

        # get variable list
        actor_network_variables = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope="actor_network")
        critic_network_variables = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope="critic_network")

        # compute loss and gradients
        with tf.name_scope("compute_pg_gradients"):
            # gradients for selecting action from policy network
            self.taken_actions = tf.placeholder(dtype=tf.int32,
                                                shape=(None, ),
                                                name="taken_actions")
            self.discounted_rewards = tf.placeholder(dtype=tf.float32,
                                                     shape=(None, ),
                                                     name="discounted_rewards")

            with tf.variable_scope("actor_network", reuse=True):
                self.logprobs = self.actor_network(self.states)

            with tf.variable_scope("critic_network", reuse=True):
                self.estimated_values = self.critic_network(self.states)

            # compute policy loss and regularization loss
            self.cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.logprobs, labels=self.taken_actions)
            self.pg_loss = tf.reduce_mean(self.cross_entropy_loss)
            self.actor_reg_loss = tf.reduce_sum(
                [tf.reduce_sum(tf.square(x)) for x in actor_network_variables])
            self.actor_loss = self.pg_loss + self.reg_param * self.actor_reg_loss

            # compute actor gradients
            self.actor_gradients = self.optimizer.compute_gradients(
                self.actor_loss, actor_network_variables)
            # compute advantages A(s) = R - V(s)
            self.advantages = tf.reduce_sum(self.discounted_rewards -
                                            self.estimated_values)
            # compute policy gradients
            for i, (grad, var) in enumerate(self.actor_gradients):
                if grad is not None:
                    self.actor_gradients[i] = (grad * self.advantages, var)

            # compute critic gradients
            self.mean_square_loss = tf.reduce_mean(
                tf.square(self.discounted_rewards - self.estimated_values))
            self.critic_reg_loss = tf.reduce_sum([
                tf.reduce_sum(tf.square(x)) for x in critic_network_variables
            ])
            self.critic_loss = self.mean_square_loss + self.reg_param * self.critic_reg_loss
            self.critic_gradients = self.optimizer.compute_gradients(
                self.critic_loss, critic_network_variables)

            # collect all gradients
            self.gradients = self.actor_gradients + self.critic_gradients

            # gradients clipping by norm
            for i, (grad, var) in enumerate(self.gradients):
                if grad is not None:
                    self.gradients[i] = (tf.clip_by_norm(
                        grad, self.max_gradient), var)

            # summarize gradients
            for grad, var in self.gradients:
                tf.summary.histogram(var.name, var)
                if grad is not None:
                    tf.summary.histogram(var.name + '/gradients', grad)

            # emit summaries
            tf.summary.histogram("estimated_values", self.estimated_values)
            tf.summary.scalar("actor_loss", self.actor_loss)
            tf.summary.scalar("critic_loss", self.critic_loss)
            tf.summary.scalar("reg_loss",
                              self.actor_reg_loss + self.critic_reg_loss)

        # training update
        with tf.name_scope("train_actor_critic"):
            # apply gradients to update actor network
            self.train_op = self.optimizer.apply_gradients(self.gradients)

        self.summarize = tf.summary.merge_all()
        self.no_op = tf.no_op()
示例#32
0
    def build_policy_network_op(self, scope="policy_network"):
        """
    Build the policy network, construct the tensorflow operation to sample
    actions from the policy network outputs, and compute the log probabilities
    of the actions taken (for computing the loss later). These operations are
    stored in self.sampled_action and self.logprob. Must handle both settings
    of self.discrete.

    Args:
            scope: the scope of the neural network

    TODO:
    Discrete case:
        action_logits: the logits for each action
            HINT: use build_mlp, check self.config for layer_size and
            n_layers
        self.sampled_action: sample from these logits
            HINT: use tf.multinomial + tf.squeeze
        self.logprob: compute the log probabilities of the taken actions
            HINT: 1. tf.nn.sparse_softmax_cross_entropy_with_logits computes
                     the *negative* log probabilities of labels, given logits.
                  2. taken actions are different than sampled actions!

    Continuous case:
        To build a policy in a continuous action space domain, we will have the
        model output the means of each action dimension, and then sample from
        a multivariate normal distribution with these means and trainable standard
        deviation.

        That is, the action a_t ~ N( mu(o_t), sigma)
        where mu(o_t) is the network that outputs the means for each action
        dimension, and sigma is a trainable variable for the standard deviations.
        N here is a multivariate gaussian distribution with the given parameters.

        action_means: the predicted means for each action dimension.
            HINT: use build_mlp, check self.config for layer_size and
            n_layers
        log_std: a trainable variable for the log standard deviations.
            HINT: think about why we use log std as the trainable variable instead of std
            HINT: use tf.get_variable
            HINT: The shape of this should match the shape of action dimension
        self.sampled_action: sample from the gaussian distribution as described above
            HINT: use tf.random_normal
            HINT: use re-parametrization to obtain N(mu, sigma) from N(0, 1)
        self.lobprob: the log probabilities of the taken actions
            HINT: use tf.contrib.distributions.MultivariateNormalDiag

    """
        #######################################################
        #########   YOUR CODE HERE - 8-12 lines.   ############

        if self.discrete:
            mlp_out = build_mlp(self.observation_placeholder, self.action_dim,
                                scope, self.config.n_layers,
                                self.config.layer_size, self.config.activation)

            self.sampled_action = tf.squeeze(
                tf.multinomial(mlp_out, num_samples=1), 1)

            self.logprob = -tf.nn.sparse_softmax_cross_entropy_with_logits(
                self.action_placeholder, mlp_out)
        else:
            action_means = build_mlp(self.observation_placeholder,
                                     self.action_dim, scope,
                                     self.config.n_layers,
                                     self.config.layer_size)

            log_std = tf.get_variable("log_std",
                                      shape=[1, self.action_dim],
                                      trainable=True)

            self.sampled_action = tf.random_normal((1, ),
                                                   mean=action_means,
                                                   stddev=tf.math.exp(log_std))

            mvn = tf.contrib.distributions.MultivariateNormalDiag(
                action_means, tf.math.exp(log_std))

            self.lobprob = mvn.log_prob(self.action_placeholder).eval()
示例#33
0
        def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1,
                  arc_seq, entropy, log_prob):
            indices = tf.range(0, layer_id, dtype=tf.int32)
            start_id = 4 * (layer_id - 2)
            prev_layers = []
            for i in range(2):  # index_1, index_2
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                prev_c, prev_h = next_c, next_h
                query = anchors_w_1.gather(indices)
                query = tf.reshape(query, [layer_id, self.lstm_size])
                query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
                query = tf.matmul(query, self.v_attn)
                logits = tf.reshape(query, [1, layer_id])
                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    logits = self.tanh_constant * tf.tanh(logits)
                index = tf.multinomial(logits, 1)
                index = tf.to_int32(index)
                index = tf.reshape(index, [1])
                arc_seq = arc_seq.write(start_id + 2 * i, index)
                curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=index)
                log_prob += curr_log_prob
                curr_ent = tf.stop_gradient(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=logits, labels=tf.nn.softmax(logits)))
                entropy += curr_ent
                prev_layers.append(anchors.read(tf.reduce_sum(index)))
                inputs = prev_layers[-1]

            for i in range(2):  # op_1, op_2
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                prev_c, prev_h = next_c, next_h
                logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft
                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    op_tanh = self.tanh_constant / self.op_tanh_reduce
                    logits = op_tanh * tf.tanh(logits)
                if use_bias:
                    logits += self.b_soft_no_learn
                op_id = tf.multinomial(logits, 1)
                op_id = tf.to_int32(op_id)
                op_id = tf.reshape(op_id, [1])
                arc_seq = arc_seq.write(start_id + 2 * i + 1, op_id)
                curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=op_id)
                log_prob += curr_log_prob
                curr_ent = tf.stop_gradient(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=logits, labels=tf.nn.softmax(logits)))
                entropy += curr_ent
                inputs = tf.nn.embedding_lookup(self.w_emb, op_id)

            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            anchors = anchors.write(layer_id, next_h[-1])
            anchors_w_1 = anchors_w_1.write(
                layer_id, tf.matmul(next_h[-1], self.w_attn_1))
            inputs = self.g_emb

            return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1,
                    arc_seq, entropy, log_prob)
示例#34
0
    def __init__(self, state_shape, n_actions, n_hidden, summary=True):
        super(ActorCriticNetworkDiscreteCNNRNN, self).__init__()
        self.state_shape = state_shape
        self.n_actions = n_actions
        self.n_hidden = n_hidden
        self.summary = summary

        self.states = tf.placeholder(tf.float32, [None] + state_shape,
                                     name="states")
        self.adv = tf.placeholder(tf.float32, name="advantage")
        self.actions_taken = tf.placeholder(tf.float32, name="actions_taken")
        self.r = tf.placeholder(tf.float32, [None], name="r")

        x = self.states
        # Convolution layers
        for i in range(4):
            x = tf.nn.elu(conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2]))

        # Flatten
        reshape = tf.expand_dims(flatten(x), [0])

        lstm_size = 256
        self.enc_cell = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        lstm_state_size = self.enc_cell.state_size
        c_init = np.zeros((1, lstm_state_size.c), np.float32)
        h_init = np.zeros((1, lstm_state_size.h), np.float32)
        self.state_init = [c_init, h_init]
        self.rnn_state_in = self.enc_cell.zero_state(1, tf.float32)
        tf.add_to_collection("rnn_state_in_c", self.rnn_state_in.c)
        tf.add_to_collection("rnn_state_in_h", self.rnn_state_in.h)
        L3, self.rnn_state_out = tf.nn.dynamic_rnn(
            cell=self.enc_cell,
            inputs=reshape,
            initial_state=self.rnn_state_in,
            dtype=tf.float32)
        tf.add_to_collection("rnn_state_out_c", self.rnn_state_out.c)
        tf.add_to_collection("rnn_state_out_h", self.rnn_state_out.h)
        L3 = tf.reshape(L3, [-1, lstm_size])

        # Fully connected for Actor

        self.logits = linear(L3, n_actions, "actionlogits",
                             normalized_columns_initializer(0.01))
        self.value = tf.reshape(
            linear(L3, 1, "value", normalized_columns_initializer(1.0)), [-1])

        self.probs = tf.nn.softmax(self.logits)

        self.action = tf.squeeze(tf.multinomial(
            self.logits - tf.reduce_max(self.logits, [1], keep_dims=True), 1),
                                 [1],
                                 name="action")
        self.action = tf.one_hot(self.action, n_actions)[0, :]

        log_probs = tf.nn.log_softmax(self.logits)
        self.actor_loss = -tf.reduce_sum(
            tf.reduce_sum(log_probs * self.actions_taken, [1]) * self.adv)

        self.critic_loss = 0.5 * tf.reduce_sum(tf.square(self.value - self.r))

        self.entropy = -tf.reduce_sum(self.probs * log_probs)

        self.loss = self.actor_loss + 0.5 * self.critic_loss - self.entropy * 0.01

        self.vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                      tf.get_variable_scope().name)
示例#35
0
def sampling_typeless_SNIS_rs(p, parten, nodes_nbrs, nbr_segment,
                              edge_features, num_sample, n_node_type,
                              edge_type_array):
    unique_nbrs = tf.unique_with_counts(nbr_segment)
    num_nbrs = tf.size(unique_nbrs.y)

    q = tf.gather(
        tf.ones(num_nbrs) / tf.cast(num_nbrs, dtype=tf.float32),
        unique_nbrs.idx)

    samples = tf.unique(
        tf.cast(tf.multinomial(tf.log([q]), num_sample)[0], tf.int32)).y

    infos = tf.sparse_to_dense(tf.reshape(tf.contrib.framework.sort(samples),
                                          [-1, 1]),
                               output_shape=tf.shape(unique_nbrs.idx),
                               sparse_values=tf.ones_like(samples,
                                                          dtype=tf.int32))

    partitions = tf.gather(infos, unique_nbrs.idx)

    samples_to_gather = tf.dynamic_partition(
        tf.range(tf.size(partitions), dtype=tf.int32), partitions, 2)[1],

    sampled_p = tf.gather(p, samples_to_gather)
    sampled_q = tf.gather(tf.gather(q, unique_nbrs.idx), samples_to_gather)

    sampled_parten = tf.gather(parten, samples_to_gather)
    sampled_nbrs = tf.gather(nodes_nbrs, samples_to_gather)

    nbrset = tf.dynamic_partition(sampled_nbrs, sampled_parten, n_node_type)
    segset = tf.dynamic_partition(tf.gather(nbr_segment, samples_to_gather),
                                  sampled_parten, n_node_type)

    edge_f_set = []
    feature_ids = tf.dynamic_partition(
        tf.gather(tf.range(tf.size(nbr_segment)), samples_to_gather),
        sampled_parten, n_node_type)
    for i in range(n_node_type):
        edge_f_set.append(tf.gather(edge_features, feature_ids[i]))

    sampled_ps = tf.dynamic_partition(sampled_p, sampled_parten, n_node_type)
    sampled_qs = tf.dynamic_partition(sampled_q, sampled_parten, n_node_type)

    condition2 = [
        tf.reduce_all(tf.math.greater(tf.size(nbrset[i]), 0))
        for i in range(n_node_type)
    ]
    all_weight = []
    for i in range(n_node_type):
        weights = tf.cond(
            condition2[i],
            false_fn=lambda: [tf.zeros(0)],
            true_fn=lambda: calculate_pq_SNIS(segset[i], nbrset[i], edge_f_set[
                i], sampled_ps[i], sampled_qs[i]))
        all_weight.append(weights)
        print(edge_type_array[i])
        edge_type_array[i] = tf.cond(
            condition2[i],
            true_fn=lambda: edge_type_array[i],
            false_fn=lambda: -tf.ones(tf.size(edge_type_array[i]), tf.int32))
    num_sampled_edges = tf.size(samples_to_gather)
    num_sampled_nbrs = tf.size(samples)

    return [
        all_weight, num_sampled_edges, num_sampled_nbrs, nbrset, segset,
        edge_f_set, edge_type_array
    ]
    def __init__(self, env):
        self.env = env

        num_actions = self.env.action_space.n

        # we have three place holders we'll use...
        # observations; used either during rollout to sample some actions, or
        # during training when combined with actions_taken and advantages.
        shape_with_batch = [None] + list(self.env.observation_space.shape)
        self.observations = tf.placeholder(shape=shape_with_batch,
                                           dtype=tf.float32)
        # the actions we took during rollout
        self.actions = tf.placeholder(tf.int32, name='actions')
        # the advantages we got from taken 'action_taken' in 'observation'
        self.advantages = tf.placeholder(tf.float32, name='advantages')

        # our model is a very simple MLP
        with tf.variable_scope("model"):
            # stack of hidden layers on flattened input; (batch,2,2,7) -> (batch,28)
            flat_input_state = slim.flatten(self.observations, scope='flat')
            final_hidden = self.hidden_layers_starting_at(
                flat_input_state, opts.hidden_layers)
            logits = slim.fully_connected(inputs=final_hidden,
                                          num_outputs=num_actions,
                                          activation_fn=None)

        # in the eval case just pick arg max
        self.action_argmax = tf.argmax(logits, 1)

        # for rollouts we need an op that samples actions from this
        # model to give a stochastic action.
        sample_action = tf.multinomial(logits, num_samples=1)
        self.sampled_action_op = tf.reshape(sample_action, shape=[])

        # we are trying to maximise the product of two components...
        # 1) the log_p of "good" actions.
        # 2) the advantage term based on the rewards from actions.

        # first we need the log_p values for each observation for the actions we specifically
        # took by sampling... we first run a log_softmax over the action logits to get
        # probabilities.
        log_softmax = tf.nn.log_softmax(logits)
        self.debug_softmax = tf.exp(log_softmax)

        # we then use a mask to only select the elements of the softmaxs that correspond
        # to the actions we actually took. we could also do this by complex indexing and a
        # gather but i always think this is more natural. the "cost" of dealing with the
        # mostly zero one hot, as opposed to doing a gather on sparse indexes, isn't a big
        # deal when the number of observations is >> number of actions.
        action_mask = tf.one_hot(indices=self.actions, depth=num_actions)
        action_log_prob = tf.reduce_sum(log_softmax * action_mask,
                                        reduction_indices=1)

        # the (element wise) product of these action log_p's with the total reward of the
        # episode represents the quantity we want to maximise. we standardise the advantage
        # values so roughly 1/2 +ve / -ve as a variance control.
        action_mul_advantages = tf.mul(action_log_prob,
                                       util.standardise(self.advantages))
        self.loss = -tf.reduce_sum(
            action_mul_advantages)  # recall: we are maximising.
        with tf.variable_scope("optimiser"):
            # dynamically create optimiser based on opts
            optimiser = util.construct_optimiser(opts)
            # calc gradients
            gradients = optimiser.compute_gradients(self.loss)
            # potentially clip and wrap with debugging tf.Print
            gradients = util.clip_and_debug_gradients(gradients, opts)
            # apply
            self.train_op = optimiser.apply_gradients(gradients)
    def __init__(self, lr, brain, h_size, epsilon, beta, max_step, normalize,
                 num_layers):
        """
        Creates Discrete Control Actor-Critic model.
        :param brain: State-space size
        :param h_size: Hidden layer size
        """
        super(DiscreteControlModel, self).__init__()
        self._create_global_steps()
        self._create_reward_encoder()
        self.normalize = normalize

        hidden_state, hidden_visual, hidden = None, None, None
        if brain.number_observations > 0:
            height_size, width_size = brain.camera_resolutions[0][
                'height'], brain.camera_resolutions[0]['width']
            bw = brain.camera_resolutions[0]['blackAndWhite']
            hidden_visual = self._create_visual_encoder(
                height_size, width_size, bw, h_size, 1, tf.nn.elu,
                num_layers)[0]
        if brain.state_space_size > 0:
            s_size = brain.state_space_size
            if brain.state_space_type == "continuous":
                hidden_state = self._create_continuous_state_encoder(
                    s_size, h_size, 1, tf.nn.elu, num_layers)[0]
            else:
                hidden_state = self._create_discrete_state_encoder(
                    s_size, h_size, 1, tf.nn.elu, num_layers)[0]

        if hidden_visual is None and hidden_state is None:
            raise Exception(
                "No valid network configuration possible. "
                "There are no states or observations in this brain")
        elif hidden_visual is not None and hidden_state is None:
            hidden = hidden_visual
        elif hidden_visual is None and hidden_state is not None:
            hidden = hidden_state
        elif hidden_visual is not None and hidden_state is not None:
            hidden = tf.concat([hidden_visual, hidden_state], axis=1)

        a_size = brain.action_space_size

        self.batch_size = tf.placeholder(shape=None,
                                         dtype=tf.int32,
                                         name='batch_size')
        self.policy = tf.layers.dense(
            hidden,
            a_size,
            activation=None,
            use_bias=False,
            kernel_initializer=c_layers.variance_scaling_initializer(
                factor=0.01))
        self.probs = tf.nn.softmax(self.policy, name="action_probs")
        self.output = tf.multinomial(self.policy, 1)
        self.output_max = tf.argmax(self.probs, name='action_max', axis=1)
        self.output = tf.identity(self.output, name="action")
        self.value = tf.layers.dense(
            hidden,
            1,
            activation=None,
            use_bias=False,
            kernel_initializer=c_layers.variance_scaling_initializer(
                factor=1.0))
        self.value = tf.identity(self.value, name="value_estimate")

        self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10),
                                      axis=1)

        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
        self.selected_actions = c_layers.one_hot_encoding(
            self.action_holder, a_size)
        self.old_probs = tf.placeholder(shape=[None, a_size],
                                        dtype=tf.float32,
                                        name='old_probabilities')
        self.responsible_probs = tf.reduce_sum(self.probs *
                                               self.selected_actions,
                                               axis=1)
        self.old_responsible_probs = tf.reduce_sum(self.old_probs *
                                                   self.selected_actions,
                                                   axis=1)

        self._create_ppo_optimizer(self.responsible_probs,
                                   self.old_responsible_probs, self.value,
                                   self.entropy, beta, epsilon, lr, max_step)
示例#38
0
        def body(*args) -> LoopState:

            loop_state = LoopState(*args)
            histories = loop_state.histories
            feedables = loop_state.feedables
            step = feedables.step

            decoded_symbols_ta = histories.decoded_symbols.write(
                step, feedables.input_symbol)

            # shape (time, batch)
            decoded_symbols = decoded_symbols_ta.stack()
            decoded_symbols.set_shape([None, None])
            decoded_symbols_in_batch = tf.transpose(decoded_symbols)

            # mask (time, batch)
            mask = histories.input_mask.stack()
            mask.set_shape([None, None])

            with tf.variable_scope(self._variable_scope, reuse=tf.AUTO_REUSE):
                # shape (batch, time, dimension)
                embedded_inputs = self.embed_inputs(decoded_symbols_in_batch)

                last_layer = self.layer(self.depth, embedded_inputs,
                                        tf.transpose(mask))

                # (batch, state_size)
                output_state = last_layer.temporal_states[:, -1, :]

                # See train_logits definition
                logits = tf.matmul(output_state, self.decoding_w)
                logits += self.decoding_b

                if sample:
                    next_symbols = tf.multinomial(logits, num_samples=1)
                else:
                    next_symbols = tf.to_int32(tf.argmax(logits, axis=1))
                    int_unfinished_mask = tf.to_int32(
                        tf.logical_not(loop_state.feedables.finished))

                    # Note this works only when PAD_TOKEN_INDEX is 0. Otherwise
                    # this have to be rewritten
                    assert PAD_TOKEN_INDEX == 0
                    next_symbols = next_symbols * int_unfinished_mask

                    has_just_finished = tf.equal(next_symbols, END_TOKEN_INDEX)
                    has_finished = tf.logical_or(feedables.finished,
                                                 has_just_finished)
                    not_finished = tf.logical_not(has_finished)

            new_feedables = DecoderFeedables(step=step + 1,
                                             finished=has_finished,
                                             input_symbol=next_symbols,
                                             prev_logits=logits)

            # TransformerHistories is a type and should be callable
            # pylint: disable=not-callable
            new_histories = TransformerHistories(
                logits=histories.logits.write(step, logits),
                decoder_outputs=histories.decoder_outputs.write(
                    step, output_state),
                mask=histories.mask.write(step, not_finished),
                outputs=histories.outputs.write(step, next_symbols),
                # transformer-specific:
                # TODO handle attention histories correctly
                decoded_symbols=decoded_symbols_ta,
                self_attention_histories=histories.self_attention_histories,
                inter_attention_histories=histories.inter_attention_histories,
                input_mask=histories.input_mask.write(
                    step + 1, tf.to_float(not_finished)))
            # pylint: enable=not-callable

            new_loop_state = LoopState(histories=new_histories,
                                       constants=[],
                                       feedables=new_feedables)

            return new_loop_state
示例#39
0
    def generator(self, name="generator", reuse=False):
        '''
           Caption sampler, sample words follow the probability distribution.

        '''
        random_uniform_init = tf.random_uniform_initializer(minval=-0.1, maxval=0.1)
        with tf.variable_scope(name):

            if reuse:
                tf.get_variable_scope().reuse_variables()

            with tf.device("/cpu:0"), tf.variable_scope("word"):
                # name: "gnerator/word"
                word_emb_W = tf.get_variable("word_emb_W", [self.vocab_size, self.G_hidden_size], tf.float32, random_uniform_init)

            with tf.variable_scope("image_feat"):
                # name: "generator/image_feat"
                image_feat_W = tf.get_variable("image_feat_W", [self.image_feat_dim, self.G_hidden_size], tf.float32, random_uniform_init)
                image_feat_b = tf.get_variable("image_feat_b", [self.G_hidden_size], tf.float32, random_uniform_init)

            with tf.variable_scope("output"):
                # name: "generator/output"
                output_W = tf.get_variable("output_W", [self.G_hidden_size, self.vocab_size], tf.float32, random_uniform_init)
                output_b = tf.get_variable("output_b", [self.vocab_size], tf.float32, random_uniform_init)

            with tf.variable_scope("lstm_encoder"):
                if self.rnn_cell == 'lstm':
                    encoder = tf.nn.rnn_cell.LSTMCell(self.G_hidden_size, state_is_tuple=True)
                elif self.rnn_cell == 'gru':
                    encoder = tf.nn.rnn_cell.GRUCell(self.G_hidden_size)

            with tf.variable_scope("lstm_decoder"):
                # WONT BE CREATED HERE
                if self.rnn_cell == 'lstm':
                    decoder = tf.nn.rnn_cell.LSTMCell(self.G_hidden_size, state_is_tuple=True)
                elif self.rnn_cell == 'gru':
                    decoder = tf.nn.rnn_cell.GRUCell(self.G_hidden_size)

            #============================= encoder ===================================================================
            state = encoder.zero_state(self.batch_size, tf.float32)
            with tf.variable_scope("image_feat") as scope:
                image_feat = self.batch_norm(self.image_feat[:,:], mode='train', name='')
            image_feat_emb = tf.matmul(image_feat, image_feat_W) + image_feat_b  # B,H
            lstm_input = image_feat_emb
            with tf.variable_scope("lstm_encoder") as scope:
                _, state = encoder(lstm_input, state)
                encoder_state = state

            #============================= decoder ===================================================================

            start_token = tf.constant(self.START, tf.int32, [self.batch_size])
            mask = tf.constant(True, "bool", [self.batch_size])

            sample_words = []

            state = encoder_state
            for j in range(self.lstm_steps):
                with tf.device("/cpu:0"):
                    if j == 0:
                        decoder_input = tf.nn.embedding_lookup(word_emb_W, start_token)
                    else:
                        decoder_input = tf.nn.embedding_lookup(word_emb_W, sample_word)

                with tf.variable_scope("lstm"):
                    if not j == 0:
                        tf.get_variable_scope().reuse_variables()
                    output, state = decoder(decoder_input, state)

                    logits = tf.matmul(output, output_W) + output_b
                    log_probs = tf.log(tf.clip_by_value(tf.nn.softmax(logits), 1e-20, 1.0))  # B,Vocab_size # add 1e-8 to prevent log(0)

                    # sample once from the multinomial distribution
                    # Montecarlo sampling
                    sample_word = tf.reshape(tf.multinomial(log_probs, 1), [self.batch_size])   # 1 means sample once
                    sample_words.append(sample_word)

            return sample_words
示例#40
0
def categorical_sample(logits, d):
    value = tf.squeeze(
        tf.multinomial(logits - tf.reduce_max(logits, [1], keep_dims=True), 1),
        [1])
    return tf.one_hot(value, d)
示例#41
0
    def __init__(self, n_inputs, n_outputs, **params):

        super(TDM_agent, self).__init__(**params)

        self.n_inputs = n_inputs
        self.n_outputs = n_outputs

        self.lr = params['agent_params'].pop('lr', 1e-3)
        self.discount = params['agent_params'].pop('discount', 1e-3)
        self.tau = params['agent_params'].pop('tau', 1e-3)
        self.max_td = params['agent_params'].pop('max_td', 0)

        self.soft_learning = params['agent_params'].pop('soft_learning', False)
        self.reward_scale = params['agent_params'].pop('reward_scale', 1.0)
        self.double = params['agent_params'].pop('double', False)
        self.reward_scale = params['agent_params'].pop('reward_scale', 1.0)
        self.huber_loss = params['agent_params'].pop('huber_loss', True)
        self.clip_gradients = params['agent_params'].pop(
            'clip_gradients', False)
        self.train_steps_per_t = params['agent_params'].pop(
            'train_steps_per_t', 1)
        self.q_train_steps_per_t = params['agent_params'].pop(
            'q_train_steps_per_t', 1)
        self.extra_q_train_steps_per_t = self.q_train_steps_per_t - self.train_steps_per_t
        assert self.extra_q_train_steps_per_t >= 0

        self.multi_step = params['agent_params'].pop('multi_step', False)
        if self.multi_step:
            self.discount = self.discount**self.multi_step

        assert not (self.soft_learning and self.double)

        self._init_placeholders()

        ### QNET
        self.qnet = Qnet(self.obs,
                         self.n_outputs,
                         params['network_spec'],
                         scope='qnet')
        self.model_Q_params = self.qnet.get_params_internal()
        self.model_Q_outputs = self.qnet.outputs

        ### FNET
        self.fnet = Qnet([self.obs, self.actions, self.scaled_tds],
                         self.n_inputs,
                         params['network_spec'],
                         scope='fnet')
        self.model_F_params = self.fnet.get_params_internal()
        self.model_F_outputs = self.fnet.outputs

        ### RNET
        self.rnet = Qnet([self.obs, self.actions, self.scaled_tds],
                         1,
                         params['network_spec'],
                         scope='rnet')
        self.model_R_params = self.rnet.get_params_internal()
        self.model_R_outputs = self.rnet.outputs

        ### ENET
        if self.soft_learning:
            self.model_Q_predict_action_from_next_obs = tf.stop_gradient(
                tf.one_hot(
                    tf.multinomial(
                        self.qnet.make_network(inputs=self.next_obs), 1)[:, 0],
                    self.qnet.output_size))
        else:
            self.model_Q_predict_action_from_next_obs = tf.stop_gradient(
                tf.one_hot(
                    tf.argmax(self.qnet.make_network(inputs=self.next_obs),
                              axis=1), self.qnet.output_size))

        # Duplicate the Fnet with different variables for the target network
        self.tfnet = Qnet([
            self.next_obs, self.model_Q_predict_action_from_next_obs,
            self.scaled_next_tds
        ],
                          self.n_inputs,
                          params['network_spec'],
                          scope='tfnet')
        self.target_F_outputs = self.tfnet.outputs
        self.target_F_params = self.tfnet.get_params_internal()
        self.target_F_from_obs = self.tfnet.make_network(
            inputs=[self.obs, self.actions, self.scaled_tds])

        # Duplicate the Rnet with different variables for the target network
        self.trnet = Qnet([
            self.next_obs, self.model_Q_predict_action_from_next_obs,
            self.scaled_next_tds
        ],
                          1,
                          params['network_spec'],
                          scope='trnet')
        self.target_R_outputs = self.trnet.outputs
        self.target_R_params = self.trnet.get_params_internal()
        self.target_R_from_obs = self.trnet.make_network(
            inputs=[self.obs, self.actions, self.scaled_tds])

        # Duplicate the Qnet with different variables for the target network
        self.tqnet = Qnet(tf.add(self.next_obs,
                                 self.td_is_not_zero * self.target_F_outputs),
                          self.n_outputs,
                          params['network_spec'],
                          scope='tqnet')
        self.target_Q_outputs = self.tqnet.outputs
        self.target_Q_params = self.tqnet.get_params_internal()

        if self.soft_learning:

            # For soft learning
            # V = sum(p(s,a) * (q(s,a) - log(p(s,a)))
            #   = sum(exp(q)/z * (q - log(exp(q)/z)))
            #   = sum(p* (log(z)))
            #   = log(z)
            self.partition_function = tf.reduce_mean(
                self.target_Q_outputs, axis=1) + tf.log(
                    tf.reduce_sum(
                        tf.exp(self.target_Q_outputs - tf.reduce_mean(
                            self.target_Q_outputs, axis=1, keepdims=True)),
                        axis=1))
            self.target_V = self.partition_function

            params['policy_params']['action_choice'] = params[
                'policy_params'].get('action_choice', 'Boltzmann')
            assert params['policy_params'][
                'action_choice'] == 'Boltzmann'  # Softmax on outputs

        self.policy = Policy_Discrete_for_Qnet(self.qnet,
                                               **params['policy_params'])

        self.rb = Replay_Buffer(self.n_inputs,
                                self.n_outputs,
                                discrete_action=True,
                                multi_step=self.multi_step,
                                **params['replay_buffer_params'])

        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)

        self.train_ops = []
        self._init_training_ops()

        self.target_Q_update = uf.update_target_network(
            self.model_Q_params,
            self.target_Q_params,
            tau=self.tau,
            update_op_control_dependencies=self.q_train_op)
        self.target_R_update = uf.update_target_network(
            self.model_R_params,
            self.target_R_params,
            tau=self.tau,
            update_op_control_dependencies=self.train_ops)
        self.target_F_update = uf.update_target_network(
            self.model_F_params,
            self.target_F_params,
            tau=self.tau,
            update_op_control_dependencies=self.train_ops)
        self.train_ops.append([self.target_R_update, self.target_F_update])

        self.q_train_ops = tf.group(self.q_train_op, self.target_Q_update)
        self.train_ops = tf.group(self.train_ops, self.q_train_ops)

        self.loss_ops = [self.R_Loss, self.F_Loss, self.Q_Loss]

        self._finish_agent_setup()
hidden2 = tf.layers.dense(hidden1,
                          n_hidden,
                          activation=tf.nn.relu,
                          kernel_initializer=intializer)

output_layer = tf.layers.dense(hidden2,
                               out,
                               activation=tf.nn.sigmoid,
                               kernel_initializer=intializer)

# Left, Right Probb
# Concatenate tensors along one direction
probb = tf.concat(values=[output_layer, 1 - output_layer], axis=1)

# Final Action "0" or "1"
action = tf.multinomial(probb, num_samples=1)

init = tf.global_variables_initializer()

# For 50 episodes of game, take 500 time steps and declare the game as Done.
n_steps = 500
episodes = 50
avg_steps = []
env = gym.make('CartPole-v1')

with tf.Session() as sess:
    sess.run(init)

    for i in range(episodes):
        # Reset Environment
        obs = env.reset()
示例#43
0
    def build_policy_network_op(self, scope="policy_network"):
        """
    Build the policy network, construct the tensorflow operation to sample 
    actions from the policy network outputs, and compute the log probabilities
    of the taken actions (for computing the loss later). These operations are 
    stored in self.sampled_action and self.logprob. Must handle both settings
    of self.discrete.

    TODO:
    Discrete case:
        logits: the logits for each action
            HINT: use build_mlp
        self.sampled_action: sample from these logits
            HINT: use tf.multinomial + tf.squeeze
        self.logprob: compute the log probabilities of the taken actions
            HINT: 1. tf.nn.sparse_softmax_cross_entropy_with_logits computes 
                     the *negative* log probabilities of labels, given logits.
                  2. taken actions are different than sampled actions!

    Continuous case:
        To build a policy in a continuous action space domain, we will have the
        model output the means of each action dimension, and then sample from
        a multivariate normal distribution with these means and trainable standard
        deviation.

        That is, the action a_t ~ N( mu(o_t), sigma)
        where mu(o_t) is the network that outputs the means for each action 
        dimension, and sigma is a trainable variable for the standard deviations.
        N here is a multivariate gaussian distribution with the given parameters.

        action_means: the predicted means for each action dimension.
            HINT: use build_mlp
        log_std: a trainable variable for the log standard deviations.
        --> think about why we use log std as the trainable variable instead of std
        self.sampled_actions: sample from the gaussian distribution as described above
            HINT: use tf.random_normal
        self.lobprob: the log probabilities of the taken actions
            HINT: use tf.contrib.distributions.MultivariateNormalDiag

    """
        #######################################################
        #########   YOUR CODE HERE - 5-10 lines.   ############

        if self.discrete:
            action_logits = build_mlp(
                self.observation_placeholder,
                self.action_dim,
                "discrete",
            )  # TODO
            self.sampled_action = tf.reshape(tf.multinomial(action_logits, 1),
                                             [-1])  # TODO
            self.logprob = -tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.action_placeholder, logits=action_logits)  # TODO
        else:
            action_means = build_mlp(
                self.observation_placeholder,
                self.action_dim,
                "continuous",
            )  # TODO
            log_std = tf.get_variable("std", [self.action_dim],
                                      dtype=tf.float32)  # TODO
            self.sampled_action = tf.random_normal(
                shape=tf.shape(action_means),
                mean=action_means,
                stddev=tf.exp(log_std))  # TODO
            self.logprob = tf.contrib.distributions.MultivariateNormalDiag(
                loc=action_means, scale_diag=tf.exp(log_std)).log_prob(
                    self.action_placeholder)  # TODO
示例#44
0
def ssd_random_sample_patch(image,
                            labels,
                            bboxes,
                            ratio_list=[0.1, 0.3, 0.5, 0.7, 0.9, 1.],
                            name=None):
    '''ssd_random_sample_patch.
  select one min_iou
  sample _width and _height from [0-width] and [0-height]
  check if the aspect ratio between 0.5-2.
  select left_top point from (width - _width, height - _height)
  check if this bbox has a min_iou with all ground_truth bboxes
  keep ground_truth those center is in this sampled patch, if none then try again
  '''
    def sample_width_height(width, height):
        with tf.name_scope('sample_width_height'):
            index = 0
            max_attempt = 10
            sampled_width, sampled_height = width, height

            def condition(index, sampled_width, sampled_height, width, height):
                return tf.logical_or(
                    tf.logical_and(
                        tf.logical_or(
                            tf.greater(sampled_width, sampled_height * 2),
                            tf.greater(sampled_height, sampled_width * 2)),
                        tf.less(index, max_attempt)), tf.less(index, 1))

            def body(index, sampled_width, sampled_height, width, height):
                sampled_width = tf.random_uniform(
                    [1], minval=0.3, maxval=0.999, dtype=tf.float32)[0] * width
                sampled_height = tf.random_uniform(
                    [1], minval=0.3, maxval=0.999,
                    dtype=tf.float32)[0] * height

                return index + 1, sampled_width, sampled_height, width, height

            [index, sampled_width, sampled_height, _, _] = tf.while_loop(
                condition,
                body, [index, sampled_width, sampled_height, width, height],
                parallel_iterations=4,
                back_prop=False,
                swap_memory=True)

            return tf.cast(sampled_width,
                           tf.int32), tf.cast(sampled_height, tf.int32)

    def jaccard_with_anchors(roi, bboxes):
        with tf.name_scope('jaccard_with_anchors'):
            int_ymin = tf.maximum(roi[0], bboxes[:, 0])
            int_xmin = tf.maximum(roi[1], bboxes[:, 1])
            int_ymax = tf.minimum(roi[2], bboxes[:, 2])
            int_xmax = tf.minimum(roi[3], bboxes[:, 3])
            h = tf.maximum(int_ymax - int_ymin + 1., 0.)
            w = tf.maximum(int_xmax - int_xmin + 1., 0.)
            inter_vol = h * w
            union_vol = (roi[3] - roi[1] + 1.) * (roi[2] - roi[0] + 1.) + (
                (bboxes[:, 2] - bboxes[:, 0] + 1.) *
                (bboxes[:, 3] - bboxes[:, 1] + 1.) - inter_vol)
            jaccard = tf.div(inter_vol, union_vol)
            return jaccard

    def areas(bboxes):
        with tf.name_scope('bboxes_areas'):
            vol = (bboxes[:, 3] - bboxes[:, 1] + 1.) * (bboxes[:, 2] -
                                                        bboxes[:, 0] + 1.)
            return vol

    def check_roi_center(width, height, labels, bboxes):
        with tf.name_scope('check_roi_center'):
            index = 0
            max_attempt = 20
            float_width = tf.to_float(width)
            float_height = tf.to_float(height)
            roi = [0., 0., float_height - 1., float_width - 1.]

            mask = tf.cast(tf.zeros_like(labels, dtype=tf.uint8), tf.bool)
            center_x, center_y = (bboxes[:, 1] + bboxes[:, 3]) / 2, (
                bboxes[:, 0] + bboxes[:, 2]) / 2

            def condition(index, roi, mask):
                return tf.logical_or(
                    tf.logical_and(
                        tf.reduce_sum(tf.to_int32(mask)) < 1,
                        tf.less(index, max_attempt)), tf.less(index, 1))

            def body(index, roi, mask):
                sampled_width, sampled_height = sample_width_height(
                    float_width, float_height)

                x = tf.random_uniform([],
                                      minval=0,
                                      maxval=width - sampled_width,
                                      dtype=tf.int32)
                y = tf.random_uniform([],
                                      minval=0,
                                      maxval=height - sampled_height,
                                      dtype=tf.int32)

                roi = [
                    tf.to_float(y),
                    tf.to_float(x),
                    tf.to_float(y + sampled_height),
                    tf.to_float(x + sampled_width)
                ]

                mask_min = tf.logical_and(tf.greater(center_y, roi[0]),
                                          tf.greater(center_x, roi[1]))
                mask_max = tf.logical_and(tf.less(center_y, roi[2]),
                                          tf.less(center_x, roi[3]))
                mask = tf.logical_and(mask_min, mask_max)

                return index + 1, roi, mask

            [index, roi, mask] = tf.while_loop(condition,
                                               body, [index, roi, mask],
                                               parallel_iterations=10,
                                               back_prop=False,
                                               swap_memory=True)

            mask_labels = tf.boolean_mask(labels, mask)
            mask_bboxes = tf.boolean_mask(bboxes, mask)

            return roi, mask_labels, mask_bboxes

    def check_roi_overlap(width, height, labels, bboxes, min_iou):
        with tf.name_scope('check_roi_overlap'):
            index = 0
            max_attempt = 50
            float_width = tf.to_float(width)
            float_height = tf.to_float(height)
            roi = [0., 0., float_height - 1., float_width - 1.]

            mask_labels = labels
            mask_bboxes = bboxes

            def condition(index, roi, mask_labels, mask_bboxes):
                return tf.logical_or(
                    tf.logical_or(
                        tf.logical_and(
                            tf.reduce_sum(
                                tf.to_int32(
                                    jaccard_with_anchors(
                                        roi, mask_bboxes) < min_iou)) > 0,
                            tf.less(index, max_attempt)), tf.less(index, 1)),
                    tf.less(tf.shape(mask_labels)[0], 1))

            def body(index, roi, mask_labels, mask_bboxes):
                roi, mask_labels, mask_bboxes = check_roi_center(
                    width, height, labels, bboxes)
                return index + 1, roi, mask_labels, mask_bboxes

            [index, roi, mask_labels, mask_bboxes
             ] = tf.while_loop(condition,
                               body, [index, roi, mask_labels, mask_bboxes],
                               parallel_iterations=16,
                               back_prop=False,
                               swap_memory=True)

            return tf.cond(
                tf.greater(tf.shape(mask_labels)[0], 0), lambda: (tf.to_int32([
                    roi[0], roi[1], roi[2] - roi[0] + 1., roi[3] - roi[1] + 1.
                ]), mask_labels, mask_bboxes), lambda: (tf.to_int32(
                    [0., 0., float_height, float_width]), labels, bboxes))

    def sample_patch(image, labels, bboxes, min_iou):
        with tf.name_scope('sample_patch'):
            height, width, depth = _ImageDimensions(image, rank=3)

            roi_slice_range, mask_labels, mask_bboxes = check_roi_overlap(
                width, height, labels, bboxes, min_iou)

            # Add offset.
            offset = tf.cast(
                tf.stack([
                    roi_slice_range[0], roi_slice_range[1], roi_slice_range[0],
                    roi_slice_range[1]
                ]), mask_bboxes.dtype)
            mask_bboxes = mask_bboxes - offset

            cliped_ymin = tf.maximum(0., mask_bboxes[:, 0])
            cliped_xmin = tf.maximum(0., mask_bboxes[:, 1])
            cliped_ymax = tf.minimum(
                tf.to_float(roi_slice_range[2]) - 1., mask_bboxes[:, 2])
            cliped_xmax = tf.minimum(
                tf.to_float(roi_slice_range[3]) - 1., mask_bboxes[:, 3])

            mask_bboxes = tf.stack(
                [cliped_ymin, cliped_xmin, cliped_ymax, cliped_xmax], axis=-1)

            return tf.cond(
                tf.logical_or(tf.less(roi_slice_range[2], 1),
                              tf.less(roi_slice_range[3], 1)), lambda:
                (image, labels, bboxes), lambda:
                (tf.slice(image, [roi_slice_range[0], roi_slice_range[1], 0], [
                    roi_slice_range[2], roi_slice_range[3], -1
                ]), mask_labels, mask_bboxes))

    with tf.name_scope('ssd_random_sample_patch'):
        image = tf.convert_to_tensor(image, name='image')

        min_iou_list = tf.convert_to_tensor(ratio_list)
        samples_min_iou = tf.multinomial(
            tf.log([[1. / len(ratio_list)] * len(ratio_list)]), 1)

        sampled_min_iou = min_iou_list[tf.cast(samples_min_iou[0][0],
                                               tf.int32)]

        return tf.cond(
            tf.less(sampled_min_iou, 1.),
            lambda: sample_patch(image, labels, bboxes, sampled_min_iou),
            lambda: (image, labels, bboxes))
示例#45
0
def train_PG(exp_name='',
             env_name='CartPole-v0',
             n_iter=100, 
             gamma=1.0, 
             min_timesteps_per_batch=1000, 
             max_path_length=None,
             learning_rate=5e-3, 
             reward_to_go=True, 
             animate=True, 
             logdir=None, 
             normalize_advantages=True,
             nn_baseline=False, 
             seed=0,
             # network arguments
             n_layers=1,
             size=32
             ):

    start = time.time()

    # Configure output directory for logging
    logz.configure_output_dir(logdir)

    # Log experimental parameters
    args = inspect.getargspec(train_PG)[0]
    locals_ = locals()
    params = {k: locals_[k] if k in locals_ else None for k in args}
    logz.save_params(params)

    # Set random seeds
    tf.set_random_seed(seed)
    np.random.seed(seed)

    # Make the gym environment
    env = gym.make(env_name)
    
    # Is this env continuous, or discrete?
    discrete = isinstance(env.action_space, gym.spaces.Discrete)

    # Maximum length for episodes
    max_path_length = max_path_length or env.spec.max_episode_steps

    #========================================================================================#
    # Notes on notation:
    # 
    # Symbolic variables have the prefix sy_, to distinguish them from the numerical values
    # that are computed later in the function
    # 
    # Prefixes and suffixes:
    # ob - observation 
    # ac - action
    # _no - this tensor should have shape (batch size /n/, observation dim)
    # _na - this tensor should have shape (batch size /n/, action dim)
    # _n  - this tensor should have shape (batch size /n/)
    # 
    # Note: batch size /n/ is defined at runtime, and until then, the shape for that axis
    # is None
    #========================================================================================#

    # Observation and action sizes
    ob_dim = env.observation_space.shape[0]
    ac_dim = env.action_space.n if discrete else env.action_space.shape[0]

    #========================================================================================#
    #                           ----------SECTION 4----------
    # Placeholders
    # 
    # Need these for batch observations / actions / advantages in policy gradient loss function.
    #========================================================================================#

    sy_ob_no = tf.placeholder(shape=[None, ob_dim], name="ob", dtype=tf.float32)
    if discrete:
        sy_ac_na = tf.placeholder(shape=[None], name="ac", dtype=tf.int32) 
    else:
        sy_ac_na = tf.placeholder(shape=[None, ac_dim], name="ac", dtype=tf.float32) 

    # Define a placeholder for advantages
    sy_adv_n = tf.placeholder(shape=[None], name='adv', dtype=tf.float32)


    #========================================================================================#
    #                           ----------SECTION 4----------
    # Networks
    # 
    # Make symbolic operations for
    #   1. Policy network outputs which describe the policy distribution.
    #       a. For the discrete case, just logits for each action.
    #
    #       b. For the continuous case, the mean / log std of a Gaussian distribution over 
    #          actions.
    #
    #      Hint: use the 'build_mlp' function you defined in utilities.
    #
    #      Note: these ops should be functions of the placeholder 'sy_ob_no'
    #
    #   2. Producing samples stochastically from the policy distribution.
    #       a. For the discrete case, an op that takes in logits and produces actions.
    #
    #          Should have shape [None]
    #
    #       b. For the continuous case, use the reparameterization trick:
    #          The output from a Gaussian distribution with mean 'mu' and std 'sigma' is
    #
    #               mu + sigma * z,         z ~ N(0, I)
    #
    #          This reduces the problem to just sampling z. (Hint: use tf.random_normal!)
    #
    #          Should have shape [None, ac_dim]
    #
    #      Note: these ops should be functions of the policy network output ops.
    #
    #   3. Computing the log probability of a set of actions that were actually taken, 
    #      according to the policy.
    #
    #      Note: these ops should be functions of the placeholder 'sy_ac_na', and the 
    #      policy network output ops.
    #   
    #========================================================================================#

    if discrete:
        # YOUR_CODE_HERE
        sy_logits_na = build_mlp(sy_ob_no, ac_dim, 'policy', n_layers=n_layers, size=size)
        sy_sampled_ac = tf.multinomial(sy_logits_na, 1)
        sy_logprob_n = tf.nn.softmax_cross_entropy_with_logits_v2(labels=sy_ac_na, logits=sy_sampled_ac)

    else:
        # YOUR_CODE_HERE
        sy_mean = TODO
        sy_logstd = TODO # logstd should just be a trainable variable, not a network output.
        sy_sampled_ac = TODO
        sy_logprob_n = TODO  # Hint: Use the log probability under a multivariate gaussian. 



    #========================================================================================#
    #                           ----------SECTION 4----------
    # Loss Function and Training Operation
    #========================================================================================#

    loss = tf.reduce_mean(sy_logprob_n) # Loss function that we'll differentiate to get the policy gradient.
    update_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)


    #========================================================================================#
    #                           ----------SECTION 5----------
    # Optional Baseline
    #========================================================================================#

    if nn_baseline:
        baseline_prediction = tf.squeeze(build_mlp(
                                sy_ob_no, 
                                1, 
                                "nn_baseline",
                                n_layers=n_layers,
                                size=size))
        # Define placeholders for targets, a loss function and an update op for fitting a 
        # neural network baseline. These will be used to fit the neural network baseline. 
        # YOUR_CODE_HERE
        # This is some actor-critic stuff, not prepared for this
        sy_b_n = tf.placeholder(shape=[None], name='b', dtype=tf.float32)
        b_loss = tf.reduce_mean(tf.losses.mean_squared_error(labels=sy_b_n, predictions=baseline_prediction))
        _update_op = tf.train.AdamOptimizer(learning_rate).minimize(b_loss)


    #========================================================================================#
    # Tensorflow Engineering: Config, Session, Variable initialization
    #========================================================================================#

    tf_config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) 

    sess = tf.Session(config=tf_config)
    sess.__enter__() # equivalent to `with sess:`
    tf.global_variables_initializer().run() #pylint: disable=E1101



    #========================================================================================#
    # Training Loop
    #========================================================================================#

    total_timesteps = 0

    for itr in range(n_iter):
        print("********** Iteration %i ************"%itr)

        # Collect paths until we have enough timesteps
        timesteps_this_batch = 0
        paths = []
        while True:
            ob = env.reset()
            obs, acs, rewards = [], [], []
            animate_this_episode=(len(paths)==0 and (itr % 10 == 0) and animate)
            steps = 0
            while True:
                if animate_this_episode:
                    env.render()
                    time.sleep(0.05)
                obs.append(ob)
                ac = sess.run(sy_sampled_ac, feed_dict={sy_ob_no : ob[None]})
                ac = ac[0]
                acs.append(ac)
                ob, rew, done, _ = env.step(ac)
                rewards.append(rew)
                steps += 1
                if done or steps > max_path_length:
                    break
            path = {"observation" : np.array(obs), 
                    "reward" : np.array(rewards), 
                    "action" : np.array(acs)}
            paths.append(path)
            timesteps_this_batch += pathlength(path)
            if timesteps_this_batch > min_timesteps_per_batch:
                break
        total_timesteps += timesteps_this_batch

        # Build arrays for observation, action for the policy gradient update by concatenating 
        # across paths
        ob_no = np.concatenate([path["observation"] for path in paths])
        ac_na = np.concatenate([path["action"] for path in paths])

        #====================================================================================#
        #                           ----------SECTION 4----------
        # Computing Q-values
        #
        # Your code should construct numpy arrays for Q-values which will be used to compute
        # advantages (which will in turn be fed to the placeholder you defined above). 
        #
        # Recall that the expression for the policy gradient PG is
        #
        #       PG = E_{tau} [sum_{t=0}^T grad log pi(a_t|s_t) * (Q_t - b_t )]
        #
        # where 
        #
        #       tau=(s_0, a_0, ...) is a trajectory,
        #       Q_t is the Q-value at time t, Q^{pi}(s_t, a_t),
        #       and b_t is a baseline which may depend on s_t. 
        #
        # You will write code for two cases, controlled by the flag 'reward_to_go':
        #
        #   Case 1: trajectory-based PG 
        #
        #       (reward_to_go = False)
        #
        #       Instead of Q^{pi}(s_t, a_t), we use the total discounted reward summed over 
        #       entire trajectory (regardless of which time step the Q-value should be for). 
        #
        #       For this case, the policy gradient estimator is
        #
        #           E_{tau} [sum_{t=0}^T grad log pi(a_t|s_t) * Ret(tau)]
        #
        #       where
        #
        #           Ret(tau) = sum_{t'=0}^T gamma^t' r_{t'}.
        #
        #       Thus, you should compute
        #
        #           Q_t = Ret(tau)
        #
        #   Case 2: reward-to-go PG 
        #
        #       (reward_to_go = True)
        #
        #       Here, you estimate Q^{pi}(s_t, a_t) by the discounted sum of rewards starting
        #       from time step t. Thus, you should compute
        #
        #           Q_t = sum_{t'=t}^T gamma^(t'-t) * r_{t'}
        #
        #
        # Store the Q-values for all timesteps and all trajectories in a variable 'q_n',
        # like the 'ob_no' and 'ac_na' above. 
        #
        #====================================================================================#

        # YOUR_CODE_HERE
        if reward_to_go == False:
            q_n = np.concatenate([ np.sum(path['reward'])*np.ones_like(path['actions']) for path in paths ])
        else:
            # the same as np.flip(np.cumsum(np.flip(b, axis=0)), axis=0)
            # the [::-1] means use a stride of -1(backwards), one can use it instead of flip
            q_n = np.concatenate([ np.cumsum(path['reward'][::-1])[::-1] for path in paths ])

        #====================================================================================#
        #                           ----------SECTION 5----------
        # Computing Baselines
        #====================================================================================#

        if nn_baseline:
            # If nn_baseline is True, use your neural network to predict reward-to-go
            # at each timestep for each trajectory, and save the result in a variable 'b_n'
            # like 'ob_no', 'ac_na', and 'q_n'.
            #
            # Hint #bl1: rescale the output from the nn_baseline to match the statistics
            # (mean and std) of the current or previous batch of Q-values. (Goes with Hint
            # #bl2 below.)

            b_n = sess.run(baseline_prediction, feed_dict={sy_ob_no: ob_no})
            adv_n = q_n - b_n
        else:
            adv_n = q_n.copy()

        #====================================================================================#
        #                           ----------SECTION 4----------
        # Advantage Normalization
        #====================================================================================#

        if normalize_advantages:
            # On the next line, implement a trick which is known empirically to reduce variance
            # in policy gradient methods: normalize adv_n to have mean zero and std=1. 
            # YOUR_CODE_HERE
            pass


        #====================================================================================#
        #                           ----------SECTION 5----------
        # Optimizing Neural Network Baseline
        #====================================================================================#
        if nn_baseline:
            # ----------SECTION 5----------
            # If a neural network baseline is used, set up the targets and the inputs for the 
            # baseline. 
            # 
            # Fit it to the current batch in order to use for the next iteration. Use the 
            # baseline_update_op you defined earlier.
            #
            # Hint #bl2: Instead of trying to target raw Q-values directly, rescale the 
            # targets to have mean zero and std=1. (Goes with Hint #bl1 above.)

            # YOUR_CODE_HERE
            pass

        #====================================================================================#
        #                           ----------SECTION 4----------
        # Performing the Policy Update
        #====================================================================================#

        # Call the update operation necessary to perform the policy gradient update based on 
        # the current batch of rollouts.
        # 
        # For debug purposes, you may wish to save the value of the loss function before
        # and after an update, and then log them below. 

        # YOUR_CODE_HERE


        # Log diagnostics
        returns = [path["reward"].sum() for path in paths]
        ep_lengths = [pathlength(path) for path in paths]
        logz.log_tabular("Time", time.time() - start)
        logz.log_tabular("Iteration", itr)
        logz.log_tabular("AverageReturn", np.mean(returns))
        logz.log_tabular("StdReturn", np.std(returns))
        logz.log_tabular("MaxReturn", np.max(returns))
        logz.log_tabular("MinReturn", np.min(returns))
        logz.log_tabular("EpLenMean", np.mean(ep_lengths))
        logz.log_tabular("EpLenStd", np.std(ep_lengths))
        logz.log_tabular("TimestepsThisBatch", timesteps_this_batch)
        logz.log_tabular("TimestepsSoFar", total_timesteps)
        logz.dump_tabular()
        logz.pickle_tf_vars()
示例#46
0
def generate_corruptions_for_fit(X,
                                 entities_list=None,
                                 eta=1,
                                 corrupt_side='s+o',
                                 entities_size=0,
                                 rnd=None):
    """Generate corruptions for training.

        Creates corrupted triples for each statement in an array of statements,
        as described by :cite:`trouillon2016complex`.

        .. note::
            Collisions are not checked, as this will be computationally expensive :cite:`trouillon2016complex`.
            That means that some corruptions *may* result in being positive statements (i.e. *unfiltered* settings).

        .. note::
            When processing large knowledge graphs, it may be useful to generate corruptions only using entities from
            a single batch.
            This also brings the benefit of creating more meaningful negatives, as entities used to corrupt are
            sourced locally.
            The function can be configured to generate corruptions *only* using the entities from the current batch.
            You can enable such behaviour be setting ``entities_size==-1``. In such case, if ``entities_list=None``
            all entities from the *current batch* will be used to generate corruptions.

    Parameters
    ----------
    X : Tensor, shape [n, 3]
        An array of positive triples that will be used to create corruptions.
    entities_list : list
        List of entities to be used for generating corruptions. (default:None).
        if ``entities_list=None``, all entities will be used to generate corruptions (default behaviour).
    eta : int
        The number of corruptions per triple that must be generated.
    corrupt_side: string
        Specifies which side of the triple to corrupt:

        - 's': corrupt only subject.
        - 'o': corrupt only object
        - 's+o': corrupt both subject and object
    entities_size: int
        Size of entities to be used while generating corruptions. It assumes entity id's start from 0 and are
        continuous. (default: 0).
        When processing large knowledge graphs, it may be useful to generate corruptions only using entities from
        a single batch.
        This also brings the benefit of creating more meaningful negatives, as entities used to corrupt are
        sourced locally.
        The function can be configured to generate corruptions *only* using the entities from the current batch.
        You can enable such behaviour be setting ``entities_size==-1``. In such case, if ``entities_list=None``
        all entities from the *current batch* will be used to generate corruptions.
    rnd: numpy.random.RandomState
        A random number generator.

    Returns
    -------

    out : Tensor, shape [n * eta, 3]
        An array of corruptions for a list of positive triples x. For each row in X the corresponding corruption
        indexes can be found at [index+i*n for i in range(eta)]

    """
    logger.debug('Generating corruptions for fit.')
    if corrupt_side not in ['s+o', 's', 'o']:
        msg = 'Invalid argument value {} for corruption side passed for evaluation.'.format(
            corrupt_side)
        logger.error(msg)
        raise ValueError(msg)

    dataset = tf.reshape(tf.tile(tf.reshape(X, [-1]), [eta]),
                         [tf.shape(X)[0] * eta, 3])

    if corrupt_side == 's+o':
        keep_subj_mask = tf.tile(
            tf.cast(
                tf.random_uniform([tf.shape(X)[0]],
                                  0,
                                  2,
                                  dtype=tf.int32,
                                  seed=rnd), tf.bool), [eta])
    else:
        keep_subj_mask = tf.cast(tf.ones(tf.shape(X)[0] * eta, tf.int32),
                                 tf.bool)
        if corrupt_side == 's':
            keep_subj_mask = tf.logical_not(keep_subj_mask)

    keep_obj_mask = tf.logical_not(keep_subj_mask)
    keep_subj_mask = tf.cast(keep_subj_mask, tf.int32)
    keep_obj_mask = tf.cast(keep_obj_mask, tf.int32)

    logger.debug('Created corruption masks.')

    if entities_size != 0:
        replacements = tf.random_uniform([tf.shape(dataset)[0]],
                                         0,
                                         entities_size,
                                         dtype=tf.int32,
                                         seed=rnd)
    else:
        if entities_list is None:
            # use entities in the batch
            entities_list, _ = tf.unique(
                tf.squeeze(
                    tf.concat([
                        tf.slice(X, [0, 0], [tf.shape(X)[0], 1]),
                        tf.slice(X, [0, 2], [tf.shape(X)[0], 1])
                    ], 0)))

        random_indices = tf.squeeze(
            tf.multinomial(tf.expand_dims(tf.zeros(tf.shape(entities_list)[0]),
                                          0),
                           num_samples=tf.shape(dataset)[0],
                           seed=rnd))

        replacements = tf.gather(entities_list, random_indices)

    subjects = tf.math.add(tf.math.multiply(keep_subj_mask, dataset[:, 0]),
                           tf.math.multiply(keep_obj_mask, replacements))
    logger.debug('Created corrupted subjects.')
    relationships = dataset[:, 1]
    logger.debug('Retained relationships.')
    objects = tf.math.add(tf.math.multiply(keep_obj_mask, dataset[:, 2]),
                          tf.math.multiply(keep_subj_mask, replacements))
    logger.debug('Created corrupted objects.')

    out = tf.transpose(tf.stack([subjects, relationships, objects]))

    logger.debug('Returning corruptions for fit.')
    return out
示例#47
0
    def train_mode(self,
                   vocab,
                   encoder_dim,
                   encoder_states,
                   encoder_features,
                   passage_word_idx,
                   passage_mask,
                   init_state,
                   decoder_inputs,
                   answer_batch,
                   loss_weights,
                   mode_gen='ce_train'):
        '''
        encoder_dim: int-valued
        encoder_states: [batch_size, passage_len, encoder_dim].
        passage_word_idx: [batch_size, passage_len] int32
        passage_mask: [batch_size, passage_len] 0/1
        init_state: Tuple of [batch_size, gen_hidden_size]
        decoder_inputs: [batch_size, max_dec_steps].
        answer_batch: [batch_size, max_dec_steps]
        '''
        options = self.options

        input_shape = tf.shape(encoder_states)
        batch_size = input_shape[0]
        passage_len = input_shape[1]

        # map decoder inputs to word embeddings
        decoder_inputs = tf.unstack(decoder_inputs,
                                    axis=1)  # max_enc_steps * [batch_size]
        answer_batch_unstack = tf.unstack(answer_batch, axis=1)

        # initialize all the variables
        state_t_1 = init_state
        context_t_1 = tf.zeros([batch_size, encoder_dim])
        coverage_t_1 = None

        # store variables from each time-step
        coverages = []
        attn_dists = []
        p_gens = []
        vocab_scores = []
        sampled_words = []
        self.encoder_features = encoder_features
        with variable_scope.variable_scope("attention_decoder"):
            # Get the weight vectors v and W_c (W_c is for coverage)
            v = variable_scope.get_variable("v", [options.attention_vec_size])
            v = tf.expand_dims(tf.expand_dims(v, axis=0), axis=0)
            w_c = None
            if options.use_coverage:
                with variable_scope.variable_scope("coverage"):
                    w_c = variable_scope.get_variable(
                        "w_c", [options.attention_vec_size])
                    w_c = tf.expand_dims(tf.expand_dims(w_c, axis=0), axis=0)

            # For each step, dec_input => lstm_output => vocab_score
            wordidx_t = decoder_inputs[0]  # [batch_size] int32
            for i in range(options.max_answer_len):
                if mode_gen in (
                        'ce_train',
                        'loss',
                ):
                    wordidx_t = decoder_inputs[
                        i]  # the wordidx_t must from decoder_inputs for phrase model
                word_t = self.embedding_lookup(wordidx_t)
                if i > 0:
                    variable_scope.get_variable_scope().reuse_variables()

                (state_t, context_t, coverage_t, attn_dist_t,
                 p_gen_t, output_t) = self.one_step_decoder(
                     state_t_1, context_t_1, coverage_t_1, word_t,
                     encoder_states, self.encoder_features, passage_word_idx,
                     passage_mask, v, w_c, vocab)
                coverages.append(coverage_t)
                attn_dists.append(attn_dist_t)
                p_gens.append(p_gen_t)
                vocab_scores.append(output_t)  # The vocabulary distributions.

                state_t_1 = state_t
                context_t_1 = context_t
                coverage_t_1 = coverage_t

                if mode_gen == 'greedy':
                    wordidx_t = tf.argmax(output_t, 1)  # [batch_size]
                    wordidx_t = tf.reshape(wordidx_t, [-1])  # [batch_size]
                elif mode_gen == 'sample':
                    log_score_t = tf.log(output_t)  # [batch_size, vsize]
                    wordidx_t = tf.multinomial(log_score_t,
                                               1)  # [batch_size, 1]
                    wordidx_t = tf.reshape(wordidx_t, [-1])  # [batch_size]
                elif mode_gen in (
                        'ce_train',
                        'loss',
                ):
                    wordidx_t = answer_batch_unstack[i]
                else:
                    assert False, 'unknown generating mode %s' % mode_gen
                sampled_words.append(wordidx_t)

        if len(sampled_words) != 0:
            sampled_words = tf.stack(sampled_words,
                                     axis=1)  # [batch_size, max_dec_steps]

        vocab_scores = tf.stack(vocab_scores,
                                axis=1)  # [batch_size, max_dec_steps, vocab]
        # calculating loss
        self._loss = None
        if mode_gen in (
                'ce_train',
                'loss',
        ):
            xent = CE_loss(vocab_scores, answer_batch,
                           loss_weights)  # [batch_size]
            if mode_gen == 'loss':
                xent *= self.placeholders.reward  # multiply with rewards
            self._loss = tf.reduce_mean(xent)
            # Calculate coverage loss from the attention distributions
            if options.use_coverage:
                with tf.variable_scope('coverage_loss'):
                    self._coverage_loss = _coverage_loss(
                        attn_dists, loss_weights)
                self._loss = self._loss + options.cov_loss_wt * self._coverage_loss

        # accuracy is calculated only under 'ce_train', where true answer is given
        if mode_gen == 'ce_train':
            accuracy = _mask_and_accuracy(vocab_scores, answer_batch,
                                          loss_weights)
            return accuracy, self._loss, sampled_words
        else:
            return None, self._loss, sampled_words
示例#48
0
def create_selection_weights(name,
                             type_,
                             shape,
                             inv_t=1,
                             initializer=tf.zeros_initializer(),
                             regularizer=None,
                             names=None):
    """Create a SelectionWeights tuple.

  Args:
    name: Name for the underlying variable containing the unnormalized weights.
    type_: "softmax" or "sigmoid" or ("softmax_topk", k) where k is an int.
    shape: Shape for the variable.
    inv_t: Inverse of the temperature to use in normalization.
    initializer: Initializer for the variable, passed to `tf.get_variable`.
    regularizer: Regularizer for the variable. A callable which accepts
      `tempered_var` and `normalized`.
    names: Name of each selection.

  Returns:
    The created SelectionWeights tuple.

  Raises:
    ValueError: if type_ is not in the supported range.
  """
    var = tf.get_variable(name, shape, initializer=initializer)

    if callable(inv_t):
        inv_t = inv_t(var)
    if inv_t == 1:
        tempered_var = var
    else:
        tempered_var = var * inv_t

    if type_ == "softmax":
        weights = tf.nn.softmax(tempered_var)
    elif type_ == "sigmoid":
        weights = tf.nn.sigmoid(tempered_var)
    elif isinstance(type_, (list, tuple)) and type_[0] == "softmax_topk":
        assert len(shape) == 1
        # TODO(rshin): Change this to select without replacement?
        selection = tf.multinomial(tf.expand_dims(var, axis=0), 4)
        selection = tf.squeeze(selection, axis=0)  # [k] selected classes.
        to_run = tf.one_hot(selection, shape[0])  # [k x nmodules] one-hot.
        # [nmodules], 0=not run, 1=run.
        to_run = tf.minimum(tf.reduce_sum(to_run, axis=0), 1)
        weights = tf.nn.softmax(tempered_var - 1e9 * (1.0 - to_run))
    else:
        raise ValueError("Unknown type: %s" % type_)

    if regularizer is not None:
        loss = regularizer(tempered_var, weights)
        if loss is not None:
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, loss)

    if names is not None:
        tf.get_collection_ref("selection_weight_names/" + var.name).extend(
            names.flatten() if isinstance(names, np.ndarray) else names)
        tf.add_to_collection("selection_weight_names_tensor/" + var.name,
                             tf.constant(names))

    return SelectionWeights(var=var,
                            tempered_var=tempered_var,
                            inv_t=inv_t,
                            normalized=weights)
示例#49
0
    def __init__(self, lr, o_size_h, o_size_w, a_size, h_size, epsilon, beta,
                 max_step):
        """
        Creates Discrete Control Actor-Critic model for use with visual observations (images).
        :param o_size_h: Observation height.
        :param o_size_w: Observation width.
        :param a_size: Action-space size.
        :param h_size: Hidden layer size.
        """
        self.observation_in = tf.placeholder(
            shape=[None, o_size_h, o_size_w, 1],
            dtype=tf.float32,
            name='observation_0')
        self.conv1 = tf.layers.conv2d(self.observation_in,
                                      32,
                                      kernel_size=[3, 3],
                                      strides=[2, 2],
                                      use_bias=False,
                                      activation=tf.nn.elu)
        self.conv2 = tf.layers.conv2d(self.conv1,
                                      64,
                                      kernel_size=[3, 3],
                                      strides=[2, 2],
                                      use_bias=False,
                                      activation=tf.nn.elu)
        self.batch_size = tf.placeholder(shape=None, dtype=tf.int32)
        hidden = tf.layers.dense(c_layers.flatten(self.conv2),
                                 h_size,
                                 use_bias=False,
                                 activation=tf.nn.elu)
        self.policy = tf.layers.dense(
            hidden,
            a_size,
            activation=None,
            use_bias=False,
            kernel_initializer=c_layers.variance_scaling_initializer(
                factor=0.1))
        self.probs = tf.nn.softmax(self.policy)
        self.action = tf.multinomial(self.policy, 1)
        self.output = tf.identity(self.action, name='action')
        self.value = tf.layers.dense(hidden,
                                     1,
                                     activation=None,
                                     use_bias=False)

        self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10),
                                      axis=1)

        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
        self.selected_actions = c_layers.one_hot_encoding(
            self.action_holder, a_size)
        self.old_probs = tf.placeholder(shape=[None, a_size],
                                        dtype=tf.float32,
                                        name='old_probabilities')
        self.responsible_probs = tf.reduce_sum(self.probs *
                                               self.selected_actions,
                                               axis=1)
        self.old_responsible_probs = tf.reduce_sum(self.old_probs *
                                                   self.selected_actions,
                                                   axis=1)

        PPOModel.__init__(self, self.responsible_probs,
                          self.old_responsible_probs, self.value, self.entropy,
                          beta, epsilon, lr, max_step)
示例#50
0
 def select_sample_input():
     current_logits = logits.read(ts)
     decoder_input = tf.to_int32(tf.multinomial(current_logits, 1))
     decoder_input = tf.stop_gradient(decoder_input)
     return tf.squeeze(decoder_input, [1])
    def make_data_tensor(self, train=True):
        if train:
            folders = self.metatrain_character_folders
            # number of tasks, not number of meta-iterations. (divide by metabatch size to measure)
            num_total_batches = 200000
        else:
            folders = self.metaval_character_folders
            num_total_batches = 600

        # make list of files
        print('Generating filenames')
        all_filenames = []
        for _ in range(num_total_batches):
            sampled_character_folders = random.sample(folders,
                                                      self.num_classes)
            random.shuffle(sampled_character_folders)
            labels_and_images = get_images(
                sampled_character_folders,
                range(self.num_classes),
                nb_samples=self.num_samples_per_class,
                shuffle=False)
            # make sure the above isn't randomized order
            labels = [li[0] for li in labels_and_images]
            filenames = [li[1] for li in labels_and_images]
            all_filenames.extend(filenames)

        # make queue for tensorflow to read from
        filename_queue = tf.train.string_input_producer(
            tf.convert_to_tensor(all_filenames), shuffle=False)
        print('Generating image processing ops')
        image_reader = tf.WholeFileReader()
        _, image_file = image_reader.read(filename_queue)
        if FLAGS.datasource == 'miniimagenet':
            image = tf.image.decode_jpeg(image_file, channels=3)
            image.set_shape((self.img_size[0], self.img_size[1], 3))
            image = tf.reshape(image, [self.dim_input])
            image = tf.cast(image, tf.float32) / 255.0
        else:
            image = tf.image.decode_png(image_file)
            image.set_shape((self.img_size[0], self.img_size[1], 1))
            image = tf.reshape(image, [self.dim_input])
            image = tf.cast(image, tf.float32) / 255.0
            image = 1.0 - image  # invert
        num_preprocess_threads = 1  # TODO - enable this to be set to >1
        min_queue_examples = 256
        examples_per_batch = self.num_classes * self.num_samples_per_class
        batch_image_size = self.batch_size * examples_per_batch
        print('Batching images')
        images = tf.train.batch(
            [image],
            batch_size=batch_image_size,
            num_threads=num_preprocess_threads,
            capacity=min_queue_examples + 3 * batch_image_size,
        )
        all_image_batches, all_label_batches = [], []
        print('Manipulating image data to be right shape')
        for i in range(self.batch_size):
            image_batch = images[i * examples_per_batch:(i + 1) *
                                 examples_per_batch]

            if FLAGS.datasource == 'omniglot':
                # omniglot augments the dataset by rotating digits to create new classes
                # get rotation per class (e.g. 0,1,2,0,0 if there are 5 classes)
                rotations = tf.multinomial(tf.log([[1., 1., 1., 1.]]),
                                           self.num_classes)
            label_batch = tf.convert_to_tensor(labels)
            new_list, new_label_list = [], []
            for k in range(self.num_samples_per_class):
                class_idxs = tf.range(0, self.num_classes)
                class_idxs = tf.random_shuffle(class_idxs)

                true_idxs = class_idxs * self.num_samples_per_class + k
                new_list.append(tf.gather(image_batch, true_idxs))
                if FLAGS.datasource == 'omniglot':  # and FLAGS.train:
                    new_list[-1] = tf.stack([
                        tf.reshape(
                            tf.image.rot90(tf.reshape(
                                new_list[-1][ind],
                                [self.img_size[0], self.img_size[1], 1]),
                                           k=tf.cast(
                                               rotations[0, class_idxs[ind]],
                                               tf.int32)), (self.dim_input, ))
                        for ind in range(self.num_classes)
                    ])
                new_label_list.append(tf.gather(label_batch, true_idxs))
            new_list = tf.concat(
                new_list, 0
            )  # has shape [self.num_classes*self.num_samples_per_class, self.dim_input]
            new_label_list = tf.concat(new_label_list, 0)
            all_image_batches.append(new_list)
            all_label_batches.append(new_label_list)
        all_image_batches = tf.stack(all_image_batches)
        all_label_batches = tf.stack(all_label_batches)
        all_label_batches = tf.one_hot(all_label_batches, self.num_classes)
        return all_image_batches, all_label_batches
示例#52
0
  def __init__(
    self
  ):
    self.num_layers = 4
    self.num_branches = 6
    self.lstm_size = 32
    self.num_blocks_per_branch = 6

    self.l2_reg = 1e-4

    self.lstm_weight = []
    for layer_id in range(self.num_layers):
      with tf.variable_scope("layer_{}".format(layer_id)):
        w = tf.get_variable("w", [2 * self.lstm_size, 4 * self.lstm_size])
        self.lstem_weight.append(w)
    
    self.num_configs = (2 ** self.num_blocks_per_branch) - 1

    with tf.variable_scope("embedding"):
      self.embed_graph = tf.get_variable("embed_graph", [1, self.lstm_size])
      self.embed_weight = tf.get_variable("weight", [
        self.num_blocks_per_branch, 
        self.lstm_size
      ])
    
    with tf.variable_scope("softmax"):
      self.softmax_weight = tf.get_variable("weight", [
        self.lstm_size,
        self.num_blocks_per_branch
      ])
    
    with tf.variable_scope("critic"):
      self.critic_weight = tf.get_variable("weight", [self.lstm_size, 1])

    arc_seq = []
    sample_log_probs = []
    all_h = []

    inputs = self.embed_graph
    prev_channel = [
      tf.zeros([1, self.lstm_size], dtype=tf.float32) 
        for _ in range(self.lstm_num_layers)
    ]
    prev_height = [
      tf.zeros([1, self.lstm_size], dtype=tf.float32)
        for _ in range(self.lstm_num_layers)
    ]

    for layer_id in range(self.num_layers):
      for branch_id in range(self.num_branches):
        next_channel, next_height = stack_lstm(
          inputs, 
          prev_channel, 
          prev_height, 
          self.lstm_weight
        )
        all_h.append(tf.stop_gradient(next_height[-1]))

        logits = tf.matmul(next_height[-1], self.softmax_weight)
        logits = 1.10 * tf.tanh(logits)

        config_id = tf.multinomial(logits, 1)
        config_id = tf.to_int32(config_id)
        config_id = tf.reshape(config_id, [1])
        arc_seq.append(config_id)
        log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits, 
          labels=config_id
        )

        inputs = tf.nn.embedding_lookup(self.embed_weight, config_id)
    
    self.sample_arc = tf.concat(arc_seq, axis=0)
    
    self.sample_log_probs = tf.concat(sample_log_probs, axis=0)
    self.ppl = tf.exp(
      tf.reduce_sum(self.sample_log_probs) / 
      tf.to_float(self.num_layers, self.num_branches)
    )
    self.all_h = all_h
示例#53
0
def multinomial_sample(x, vocab_size, temperature):
    """Multinomial sampling from a n-dimensional tensor."""
    samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1)
    reshaped_samples = tf.reshape(samples, tf.shape(x)[:-1])
    return tf.to_int32(reshaped_samples)
示例#54
0
      def loop_fn(time, cell_output, cell_state, loop_state):
        if cell_output is None:  # time == 0
          next_cell_state = encoder_states
          next_input = tf.tile(go_embedding, to_T([N, 1]))
        else:  # time > 0
          next_cell_state = cell_state

          # compute the attention map over the input sequence
          # a_raw has shape [T, N, 1]
          att_raw = tf.reduce_sum(
            tf.tanh(tf.nn.xw_plus_b(cell_output, W_a, b_a) +
                self.encoder_h_transformed) * v,
            axis=2, keep_dims=True)
          # softmax along the first dimension (T) over not finished examples
          # att has shape [T, N, 1]
          att = tf.nn.softmax(att_raw, dim=0)*self.seq_not_finished
          att = att / tf.reduce_sum(att + 1e-10, axis=0, keep_dims=True)
          # d has shape [N, lstm_dim]
          d2 = tf.reduce_sum(att*self.encoder_outputs, axis=0)

          # token_scores has shape [N, num_vocab]
          token_scores = tf.nn.xw_plus_b(
            tf.concat([cell_output, d2], axis=1),
            W_y, b_y)

          decoding_state = loop_state[2]
          # token_validity has shape [N, num_vocab]
          token_validity = _get_valid_tokens(decoding_state, self.W, self.b)
          token_validity.set_shape([None, self.decoder_num_vocab])
          if use_gt_layout is not None:
            # when there's ground-truth layout, do not re-normalize prob
            # and treat all tokens as valid
            token_validity = tf.logical_or(token_validity, use_gt_layout)

          validity_mult = tf.cast(token_validity, tf.float32)

          # predict the next token (behavior depending on parameters)
          if sampling:
            token_scores_valid = token_scores - (1-validity_mult) * 50
            # TODO:debug
            sampled_token = tf.cast(tf.reshape(
                tf.multinomial(token_scores_valid/self.temperature, 1), [-1]), tf.int32)

            # make sure that the predictions are ALWAYS valid 
            # (it can be invalid with very small prob)
            # If not, just fall back to min cases
            # pred_mask has shape [N, num_vocab]
            sampled_mask = tf.equal(mask_range, tf.reshape(sampled_token, [-1, 1]))
            is_sampled_valid = tf.reduce_any(
              tf.logical_and(sampled_mask, token_validity),
              axis=1)

            # Fall back to max score (no sampling)
            min_score = tf.reduce_min(token_scores)
            token_scores_valid = tf.where(token_validity, token_scores,
                           tf.ones_like(token_scores)*(min_score-1))
            max_score_token = tf.cast(tf.argmax(token_scores_valid, 1), tf.int32)
            predicted_token = tf.where(is_sampled_valid, sampled_token, max_score_token)
          else:
            min_score = tf.reduce_min(token_scores)
            token_scores_valid = tf.where(token_validity, token_scores,
                           tf.ones_like(token_scores)*(min_score-1))
            # predicted_token has shape [N]
            predicted_token = tf.cast(tf.argmax(token_scores_valid, 1), tf.int32)
          if use_gt_layout is not None:
            predicted_token = (gt_layout_batch[time-1] * gt_layout_mult
                     + predicted_token * pred_layout_mult)

          # a robust version of softmax
          # all_token_probs has shape [N, num_vocab]
          all_token_probs = tf.nn.softmax(token_scores) * validity_mult
          # tf.check_numerics(all_token_probs, 'NaN/Inf before div')
          all_token_probs = all_token_probs / tf.reduce_sum(all_token_probs + 1e-10, axis=1, keep_dims=True)
          # tf.check_numerics(all_token_probs, 'NaN/Inf after div')

          # mask has shape [N, num_vocab]
          mask = tf.equal(mask_range, tf.reshape(predicted_token, [-1, 1]))
          # token_prob has shape [N], the probability of the predicted token
          # although token_prob is not needed for predicting the next token
          # it is needed in output (for policy gradient training)
          # [N, num_vocab]
          token_prob = tf.reduce_sum(all_token_probs * tf.cast(mask, tf.float32), axis=1)
          # tf.assert_positive(token_prob)
          neg_entropy = tf.reduce_sum(
            all_token_probs * tf.log(all_token_probs + (1-validity_mult) + 1e-10),
            axis=1)

          # update states
          updated_decoding_state = _update_decoding_state(
            decoding_state, predicted_token, self.P)

          # the prediction is from the cell output of the last step
          # timestep (t-1), feed it as input into timestep t
          next_input = tf.nn.embedding_lookup(embedding_mat, predicted_token)

        elements_finished = tf.greater_equal(time, T_max)

        # loop_state is a 5-tuple, representing
        #   1) the predicted_tokens
        #   2) the prob of predicted_tokens
        #   3) the decoding state (used for validity)
        #   4) the negative entropy of policy (accumulated across timesteps)
        #   5) the attention
        if loop_state is None:  # time == 0
          # Write the predicted token into the output
          predicted_token_array = tf.TensorArray(dtype=tf.int32, size=T_max,
            infer_shape=False)
          token_prob_array = tf.TensorArray(dtype=tf.float32, size=T_max,
            infer_shape=False)
          init_decoding_state = tf.tile(to_T([[0, 0, T_max]], dtype=tf.int32), to_T([N, 1]))
          att_array = tf.TensorArray(dtype=tf.float32, size=T_max,
            infer_shape=False)
          next_loop_state = (predicted_token_array,
                   token_prob_array,
                   init_decoding_state,
                   tf.zeros(to_T([N]), dtype=tf.float32),
                   att_array)
        else:  # time > 0
          t_write = time-1
          next_loop_state = (loop_state[0].write(t_write, predicted_token),
                   loop_state[1].write(t_write, token_prob),
                   updated_decoding_state,
                   loop_state[3] + neg_entropy,
                   loop_state[4].write(t_write, att))
        return (elements_finished, next_input, next_cell_state, cell_output,
            next_loop_state)
示例#55
0
 def random_category(self, size, dtype):
     prior = tf.ones(tf.stack((tf.shape(self.gan.inputs.x)[0], size)))*1./size
     dist = tf.log(prior + TINY)
     sample=tf.multinomial(dist, num_samples=1)[:, 0]
     return tf.one_hot(sample, size, dtype=dtype)
示例#56
0
 def inner_loop(i, alive_seq):
     logit = symbols_to_logits_fn(alive_seq)[0]
     new_samples = tf.multinomial(logit, 1)
     new_samples = tf.to_int32(new_samples)
     alive_seq = tf.concat([alive_seq, new_samples], 1)
     return (i + 1, alive_seq)
示例#57
0
  def _build_sampler(self):
    """Build the sampler ops and the log_prob ops."""

    arc_seq = []
    sample_log_probs = []
    sample_entropy = []
    all_h = []
    all_h_w = []

    # sampler ops
    inputs = self.g_emb
    prev_c, prev_h = [], []
    for _ in range(self.lstm_num_layers):
      prev_c.append(tf.zeros([1, self.lstm_size], dtype=tf.float32))
      prev_h.append(tf.zeros([1, self.lstm_size], dtype=tf.float32))

    # used = tf.zeros([self.rhn_depth, 2], dtype=tf.int32)
    for layer_id in range(self.rhn_depth):
      next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
      prev_c, prev_h = next_c, next_h
      all_h.append(next_h[-1])
      all_h_w.append(tf.matmul(next_h[-1], self.attn_w_1))

      if layer_id > 0:
        query = tf.matmul(next_h[-1], self.attn_w_2)
        query = query + tf.concat(all_h_w[:-1], axis=0)
        query = tf.tanh(query)
        logits = tf.matmul(query, self.attn_v)
        logits = tf.reshape(logits, [1, layer_id])

        if self.temperature is not None:
          logits /= self.temperature
        if self.tanh_constant is not None:
          logits = self.tanh_constant * tf.tanh(logits)
        diff = tf.to_float(layer_id - tf.range(0, layer_id)) ** 2
        logits -= tf.reshape(diff, [1, layer_id]) / 6.0

        skip_index = tf.multinomial(logits, 1)
        skip_index = tf.to_int32(skip_index)
        skip_index = tf.reshape(skip_index, [1])
        arc_seq.append(skip_index)

        log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=skip_index)
        sample_log_probs.append(log_prob)

        entropy = log_prob * tf.exp(-log_prob)
        sample_entropy.append(tf.stop_gradient(entropy))

        inputs = tf.nn.embedding_lookup(
          tf.concat(all_h[:-1], axis=0), skip_index)
        inputs /= (0.1 + tf.to_float(layer_id - skip_index))
      else:
        inputs = self.g_emb

      next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
      prev_c, prev_h = next_c, next_h
      logits = tf.matmul(next_h[-1], self.w_soft)
      if self.temperature is not None:
        logits /= self.temperature
      if self.tanh_constant is not None:
        logits = self.tanh_constant * tf.tanh(logits)
      func = tf.multinomial(logits, 1)
      func = tf.to_int32(func)
      func = tf.reshape(func, [1])
      arc_seq.append(func)
      log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=func)
      sample_log_probs.append(log_prob)
      entropy = log_prob * tf.exp(-log_prob)
      sample_entropy.append(tf.stop_gradient(entropy))
      inputs = tf.nn.embedding_lookup(self.w_emb, func)

    arc_seq = tf.concat(arc_seq, axis=0)
    self.sample_arc = arc_seq

    self.sample_log_probs = tf.concat(sample_log_probs, axis=0)
    self.ppl = tf.exp(tf.reduce_mean(self.sample_log_probs))

    sample_entropy = tf.concat(sample_entropy, axis=0)
    self.sample_entropy = tf.reduce_sum(sample_entropy)

    self.all_h = all_h
示例#58
0
def image_augmentations(image,
                        data_augmentations,
                        model_input_image_size,
                        label=None):
    """Coordinating image augmentations for both image and heatmap."""
    im_size = [int(x) for x in image.get_shape()]
    im_size_check = np.any(
        np.less_equal(model_input_image_size[:2], im_size[:2]))
    if data_augmentations is not None:
        # Pixel/image-level augmentations
        if 'singleton' in data_augmentations:
            image = tf.expand_dims(image, axis=-1)
            print 'Adding singleton dimension to image.'
        if 'singleton_label' in data_augmentations:
            label = tf.expand_dims(label, axis=-1)
            print 'Adding singleton dimension to label.'
        if 'bsds_crop' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            # intermediate_size = [171, 256, 3]
            # intermediate_size = [256, 384, 3]
            intermediate_size = [324, 484, 3]
            image = tf.image.resize_image_with_crop_or_pad(
                image, intermediate_size[0], intermediate_size[1])
            label = tf.image.resize_image_with_crop_or_pad(
                label, intermediate_size[0], intermediate_size[1])
            print 'Applying BSDS crop.'
        if 'uint8_rescale' in data_augmentations:
            image = tf.cast(image, tf.float32) / 255.
            print 'Applying uint8 rescale to the image.'
        if 'uint8_rescale_label' in data_augmentations:
            label = tf.cast(label, tf.float32) / 255.
            print 'Applying uint8 rescale to the label.'
        if 'uint8_rescale_-1_1' in data_augmentations:
            image = 2 * (tf.cast(image, tf.float32) / 255.) - 1
            print 'Applying uint8 rescale.'
        if 'image_to_bgr' in data_augmentations:
            image = tf.stack([image[:, :, 2], image[:, :, 1], image[:, :, 0]],
                             axis=-1)
        if 'pascal_normalize' in data_augmentations:
            image = image - [123.68, 116.78, 103.94]
        if 'random_contrast' in data_augmentations:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
            print 'Applying random contrast.'
        if 'random_brightness' in data_augmentations:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image = tf.image.random_brightness(image, max_delta=63.)
            print 'Applying random brightness.'
        if 'grayscale' in data_augmentations and im_size_check:
            # image = tf.image.rgb_to_grayscale(image)
            image = tf.expand_dims(image[:, :, 0], axis=-1)  # ABOVE INSTEAD?
            print 'Converting to grayscale.'
        # Affine augmentations
        if 'rotate' in data_augmentations and im_size_check:
            max_theta = 22.
            angle_rad = (max_theta / 180.) * math.pi
            angles = tf.random_uniform([], -angle_rad, angle_rad)
            transform = tf.contrib.image.angles_to_projective_transforms(
                angles, im_size[0], im_size[1])
            image = tf.contrib.image.transform(
                image,
                tf.contrib.image.compose_transforms(transform),
                interpolation='BILINEAR')  # or 'NEAREST'
            print 'Applying random rotate.'
        if 'rotate_image_label' in data_augmentations and im_size_check:
            max_theta = 30.
            angle_rad = (max_theta / 180.) * math.pi
            angles = tf.random_uniform([], -angle_rad, angle_rad)
            transform = tf.contrib.image.angles_to_projective_transforms(
                angles, im_size[0], im_size[1])
            image = tf.contrib.image.transform(
                image,
                tf.contrib.image.compose_transforms(transform),
                interpolation='BILINEAR')  # or 'NEAREST'
            label = tf.contrib.image.transform(
                label,
                tf.contrib.image.compose_transforms(transform),
                interpolation='BILINEAR')  # or 'NEAREST'
            print 'Applying random rotate.'
        if 'random_scale_crop_image_label' in data_augmentations\
                and im_size_check:
            scale_choices = tf.convert_to_tensor([1., 1.02, 1.04, 1.06, 1.08])
            samples = tf.multinomial(tf.log([tf.ones_like(scale_choices)]), 1)
            image_shape = image.get_shape().as_list()
            scale = scale_choices[tf.cast(samples[0][0], tf.int32)]
            scale_tf = tf.cast(
                tf.round(
                    np.asarray(model_input_image_size[:2]).astype(np.float32) *
                    scale), tf.int32)
            combined = tf.concat([image, label], axis=-1)
            combo_shape = combined.get_shape().as_list()
            combined_crop = tf.random_crop(
                combined, tf.concat([scale_tf, [combo_shape[-1]]], 0))
            combined_resize = tf.squeeze(tf.image.resize_bicubic(
                tf.expand_dims(combined_crop, axis=0),
                model_input_image_size[:2],
                align_corners=True),
                                         axis=0)
            image = combined_resize[:, :, :image_shape[-1]]
            label = combined_resize[:, :, image_shape[-1]:]
            image.set_shape(model_input_image_size)
            label.set_shape(model_input_image_size[:2] +
                            [combo_shape[-1] - model_input_image_size[-1]])
        if 'rc_res' in data_augmentations and im_size_check:
            image = random_crop(image, model_input_image_size)
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            ms = [x // 2 for x in model_input_image_size]
            image = resize_image_label(im=image,
                                       model_input_image_size=ms,
                                       f='bicubic')
            print 'Applying random crop and resize.'
        if 'cc_res' in data_augmentations and im_size_check:
            image = center_crop(image, model_input_image_size)
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            ms = [x // 2 for x in model_input_image_size]
            image = resize_image_label(im=image,
                                       model_input_image_size=ms,
                                       f='bicubic')
            print 'Applying center crop and resize.'
        if 'random_crop' in data_augmentations and im_size_check:
            image = random_crop(image, model_input_image_size)
            print 'Applying random crop.'
        if 'center_crop' in data_augmentations and im_size_check:
            image = center_crop(image, model_input_image_size)
            print 'Applying center crop.'
        if 'random_crop_image_label' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image, label = crop_image_label(image=image,
                                            label=label,
                                            size=model_input_image_size,
                                            crop='random')
        if 'center_crop_image_label' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image, label = crop_image_label(image=image,
                                            label=label,
                                            size=model_input_image_size,
                                            crop='center')
        if 'resize' in data_augmentations and im_size_check:
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            image = resize_image_label(
                im=image,
                model_input_image_size=model_input_image_size,
                f='bicubic')
            print 'Applying area resize.'
        if 'jk_resize' in data_augmentations and im_size_check:
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            image = tf.image.resize_image_with_crop_or_pad(
                image, model_input_image_size[0], model_input_image_size[1])
            print 'Applying area resize.'
        if 'resize_and_crop' in data_augmentations and im_size_check:
            model_input_image_size_1 = np.asarray(
                model_input_image_size[:2]) + 28
            image = resize_image_label(
                im=image,
                model_input_image_size=model_input_image_size_1,
                f='area')
            image = center_crop(image, model_input_image_size)
            print 'Applying area resize.'
        if 'resize_nn' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            image = resize_image_label(
                im=image,
                model_input_image_size=model_input_image_size,
                f='nearest')
            print 'Applying nearest resize.'
        if 'resize_image_label' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            image = resize_image_label(
                im=image,
                model_input_image_size=model_input_image_size,
                f='bicubic')
            label = resize_image_label(
                im=label,
                model_input_image_size=model_input_image_size,
                f='bicubic')
            print 'Applying bilinear resize.'
        elif 'resize_nn_image_label' in data_augmentations and im_size_check:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            if len(model_input_image_size) > 2:
                model_input_image_size = model_input_image_size[:2]
            image = resize_image_label(
                im=image,
                model_input_image_size=model_input_image_size,
                f='nearest')
            label = resize_image_label(
                im=label,
                model_input_image_size=model_input_image_size,
                f='nearest')
            print 'Applying nearest resize.'
        else:
            pass
        if 'left_right' in data_augmentations:
            image = image_flip(image, direction='left_right')
            print 'Applying random flip left-right.'
        if 'up_down' in data_augmentations:
            image = image_flip(image, direction='up_down')
            print 'Applying random flip up-down.'
        if 'lr_flip_image_label' in data_augmentations:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image, label = lr_flip_image_label(image, label)
        if 'ud_flip_image_label' in data_augmentations:
            assert len(image.get_shape()) == 3, '4D not implemented yet.'
            image, label = ud_flip_image_label(image, label)
        if 'gaussian_noise' in data_augmentations:
            im_shape = image.get_shape().as_list()
            assert len(im_shape) == 3, '4D not implemented yet.'
            sigma = 1. / 10.
            mu = 0.
            image = image + tf.random_normal(im_shape, mean=mu, stddev=sigma)
            print 'Applying gaussian noise.'
        if 'gaussian_noise_small' in data_augmentations:
            im_shape = image.get_shape().as_list()
            assert len(im_shape) == 3, '4D not implemented yet.'
            sigma = 1. / 20.
            mu = 0.
            image = image + tf.random_normal(im_shape, mean=mu, stddev=sigma)
            print 'Applying gaussian noise.'
        if 'calculate_rate_time_crop' in data_augmentations:
            im_shape = image.get_shape().as_list()
            minval = im_shape[0] // 3
            time_crop = tf.random_uniform([],
                                          minval=minval,
                                          maxval=im_shape[0],
                                          dtype=tf.int32)

            # For now always pull from the beginning
            indices = tf.range(0, time_crop, dtype=tf.int32)
            selected_image = tf.gather(image, indices)
            padded_image = tf.zeros([im_shape[0] - time_crop] + im_shape[1:],
                                    dtype=selected_image.dtype)

            # Randomly concatenate pad to front or back
            image = tf.cond(pred=tf.greater(
                tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32),
                0.5),
                            true_fn=lambda: tf.concat(
                                [selected_image, padded_image], axis=0),
                            false_fn=lambda: tf.concat(
                                [padded_image, selected_image], axis=0))
            image.set_shape(im_shape)

            # Convert label to rate
            label = label / im_shape[0]
        if 'calculate_rate' in data_augmentations:
            label = label / image.get_shape().as_list()[0]
            print 'Applying rate transformation.'
        if 'threshold' in data_augmentations:
            image = tf.cast(tf.greater(image, 0.1), tf.float32)
            print 'Applying threshold.'
        if 'nonzero_label' in data_augmentations:
            label = tf.cast(tf.greater(label, 0.2), tf.float32)
            print 'Applying threshold.'
        if 'zero_one' in data_augmentations:
            image = tf.minimum(tf.maximum(image, 0.), 1.)
            print 'Applying threshold.'
        if 'timestep_duplication' in data_augmentations:
            image = tf.stack([image for iid in range(7)])
            print 'Applying timestep duplication.'
        if 'per_image_standardization' in data_augmentations:
            image = tf.image.per_image_standardization(image)
            print 'Applying per-image zscore.'
        if 'flip_polarity' in data_augmentations:
            image = tf.abs(image - 1.)
        if 'NCHW' in data_augmentations:
            image = tf.transpose(image, (2, 0, 1))
    else:
        assert len(image.get_shape()) == 3, '4D not implemented yet.'
        image = tf.image.resize_image_with_crop_or_pad(
            image, model_input_image_size[0], model_input_image_size[1])
    return image, label
示例#59
0
n_outputs = 1

learning_rate = 0.01

initializer = tf.variance_scaling_initializer()

X = tf.placeholder(tf.float32, shape=[None, n_inputs])

hidden = tf.layers.dense(X,
                         n_hidden,
                         activation=tf.nn.elu,
                         kernel_initializer=initializer)
logits = tf.layers.dense(hidden, n_outputs)
outputs = tf.nn.sigmoid(logits)  # probability of action 0 (left)
p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])
action = tf.multinomial(tf.log(p_left_and_right), num_samples=1)

y = 1. - tf.to_float(action)
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y,
                                                        logits=logits)
optimizer = tf.train.AdamOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(cross_entropy)
gradients = [grad for grad, variable in grads_and_vars]
gradient_placeholders = []
grads_and_vars_feed = []
for grad, variable in grads_and_vars:
    gradient_placeholder = tf.placeholder(tf.float32, shape=grad.get_shape())
    gradient_placeholders.append(gradient_placeholder)
    grads_and_vars_feed.append((gradient_placeholder, variable))
training_op = optimizer.apply_gradients(grads_and_vars_feed)
示例#60
0
def ptb_producer(doc, que, ans, batch_size, vocab=100, name=None, config=None):
    """Iterate on the raw PTB data.

  This chunks up raw_data into batches of examples and returns Tensors that
  are drawn from these batches.

  Args:
    raw_data: one of the raw data outputs from ptb_raw_data.
    batch_size: int, the batch size.
    num_steps: int, the number of unrolls.
    name: the name of this operation (optional).

  Returns:
    A pair of Tensors, each shaped [batch_size, num_steps]. The second element
    of the tuple is the same data time-shifted to the right by one.

  Raises:
    tf.errors.InvalidArgumentError: if batch_size or num_steps are too high.
  """
    #print(ans)
    with tf.name_scope(name, "PTBProducer", [doc, que, ans]):
        doc_len = len(doc)
        vocab = config.vocab_size
        vans = []
        for e in ans:
            van = [0] * vocab
            van[e] = 1
            vans.append(van)
        #print(doc)
        d = len(doc[0])
        q = len(que[0])

        #print(d)

        epoch_size = doc_len // batch_size
        #print(epoch_size)
        #print(ans)

        doc = tf.convert_to_tensor(doc, name="documents", dtype=tf.int32)
        que = tf.convert_to_tensor(que, name="questions", dtype=tf.int32)
        vans = tf.convert_to_tensor(vans, name="vanswers", dtype=tf.int32)
        ans = tf.convert_to_tensor(ans, name="answers", dtype=tf.int32)

        #data_len = len(documents)
        # batch_len = len(documents[0])

        assertion = tf.assert_positive(
            epoch_size,
            message="epoch_size == 0, decrease batch_size or num_steps")
        with tf.control_dependencies([assertion]):
            epoch_size = tf.identity(epoch_size, name="epoch_size")

        i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue()

        #elems = tf.convert_to_tensor([1,2,3,5])
        batch_prob = []
        #for batch_number in range(batch_size):
        batch_prob.append([10.] * doc_len)

        samples = tf.multinomial(tf.log(batch_prob),
                                 batch_size)  # note log-prob
        print(tf.get_variable_scope().reuse == False)
        x = []
        y = []
        z = []
        zz = []
        for batch_number in range(batch_size):
            x.append(doc[tf.cast(samples[0][batch_number], tf.int32)])
            y.append(que[tf.cast(samples[0][batch_number], tf.int32)])
            z.append(vans[tf.cast(samples[0][batch_number], tf.int32)])
            zz.append(ans[tf.cast(samples[0][batch_number], tf.int32)])

        x = tf.convert_to_tensor(x, name="documents", dtype=tf.int32)
        y = tf.convert_to_tensor(y, name="questions", dtype=tf.int32)
        z = tf.convert_to_tensor(z, name="vanswers", dtype=tf.int32)
        zz = tf.convert_to_tensor(zz, name="answers", dtype=tf.int32)
        '''
    x= tf.slice(doc,[i*batch_size,0],[batch_size,d])
    y= tf.slice(que,[i*batch_size,0],[batch_size,q])
    z= tf.slice(vans,[i*batch_size,0],[batch_size,vocab])
    zz=tf.slice(ans,[i*batch_size],[batch_size])
    '''
        #print(i)
        #print(epoch_size)
        return x, y, z, zz, epoch_size