示例#1
0
def clip_by_norm(gvs, grad_norm_thresh, scope="grad_clip"):
    """
    Clip gradients by norm, and scope.

    Args:
      gvs: list of gradient variable tuples
      grad_norm_thresh: norm threshold to clip
      scope: scope for the clip operation
    """
    new_gvs = []
    if scope:
        with tf.name_scope(scope):
            #gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \
            #       for gv in gvs if gv[0]]
            #return gvs
            for gv in gvs:
                if gv[0]:
                    new_gvs.append((tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]))
                else:
                    print("no gradient for %s" % gv[1].op.name)
                    #raise
                    new_gvs.append(gv)
            return new_gvs
    else:
        #gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \
        #       for gv in gvs if gv[0]]
        #return gvs
        for gv in gvs:
            if gv[0]:
                new_gvs.append((tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]))
            else:
                print("no gradient for %s" % gv[1].op.name)
                #raise
                new_gvs.append(gv)
        return new_gvs        
示例#2
0
  def two_linear( self, xin, linear_size, residual, dropout_keep_prob, max_norm, batch_norm, dtype, idx ):
    """
    Make a bi-linear block with optional residual connection

    Args
      xin: the batch that enters the block
      linear_size: integer. The size of the linear units
      residual: boolean. Whether to add a residual connection
      dropout_keep_prob: float [0,1]. Probability of dropping something out
      max_norm: boolean. Whether to clip weights to 1-norm
      batch_norm: boolean. Whether to do batch normalization
      dtype: type of the weigths. Usually tf.float32
      idx: integer. Number of layer (for naming/scoping)
    Returns
      y: the batch after it leaves the block
    """

    with vs.variable_scope( "two_linear_"+str(idx) ) as scope:

      input_size = int(xin.get_shape()[1])

      # Linear 1
      w2 = tf.get_variable( name="w2_"+str(idx), initializer=kaiming, shape=[input_size, linear_size], dtype=dtype)
      b2 = tf.get_variable( name="b2_"+str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype)
      w2 = tf.clip_by_norm(w2,1) if max_norm else w2
      y = tf.matmul(xin, w2) + b2
      if  batch_norm:
        y = tf.layers.batch_normalization(y,training=self.isTraining,name="batch_normalization1"+str(idx))

      y = tf.nn.relu( y )
      y = tf.nn.dropout( y, dropout_keep_prob )

      # Linear 2
      w3 = tf.get_variable( name="w3_"+str(idx), initializer=kaiming, shape=[linear_size, linear_size], dtype=dtype)
      b3 = tf.get_variable( name="b3_"+str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype)
      w3 = tf.clip_by_norm(w3,1) if max_norm else w3
      y = tf.matmul(y, w3) + b3

      if  batch_norm:
        y = tf.layers.batch_normalization(y,training=self.isTraining,name="batch_normalization2"+str(idx))

      y = tf.nn.relu( y )
      y = tf.nn.dropout( y, dropout_keep_prob )

      # Residual every 2 blocks
      y = (xin + y) if residual else y

    return y
示例#3
0
  def create_variables_for_optimization(self):
    with tf.name_scope("optimization"):
      with tf.name_scope("masker"):
          self.mask = tf.sequence_mask(self.seq_len, self.num_step)
          self.mask = tf.reshape(tf.cast(self.mask, tf.float32), (-1,))
      if self.loss_function == "cross_entropy":
        self.pl_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                                            logits=self.logit,
                                            labels=self.actions_flatten)
      elif self.loss_function == "l2":
        self.one_hot_actions = tf.one_hot(self.actions_flatten, self.num_actions)
        self.pl_loss = tf.reduce_mean((self.probs - self.one_hot_actions) ** 2,
                                            axis=1)
      else:
          raise ValueError("loss function type is not defined")

      self.pl_loss = tf.multiply(self.pl_loss, self.mask)
      self.pl_loss = tf.reduce_mean(tf.multiply(self.pl_loss, self.returns_flatten))

      self.entropy = tf.multiply(self.entropy, self.mask)
      self.entropy = tf.reduce_mean(self.entropy)

      self.loss = self.pl_loss - self.entropy_bonus * self.entropy

      self.trainable_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="policy_network")
      self.gradients = self.optimizer.compute_gradients(self.loss, var_list=self.trainable_variables)
      self.clipped_gradients = [(tf.clip_by_norm(grad, self.max_gradient), var)
                                  for grad, var in self.gradients]
      self.train_op = self.optimizer.apply_gradients(self.clipped_gradients,
                                                     self.global_step)
      self.grad_norm = tf.global_norm([grad for grad, var in self.gradients])
      self.var_norm = tf.global_norm(self.trainable_variables)
    def _clip_gradients(self, grads_and_vars):
        """Clip gradients.
        Args:
            grads_and_vars (list): list of tuples of `(grads, vars)`
        Returns:
            clipped_grads_and_vars (list): list of tuple of
                `(clipped grads, vars)`
        """
        # TODO: Optionally add gradient noise

        clipped_grads_and_vars = []

        # Clip gradient norm
        for grad, var in grads_and_vars:
            if grad is not None:
                clipped_grads_and_vars.append(
                    (tf.clip_by_norm(grad, clip_norm=self.clip_grad_norm),
                     var))

        # Clip gradient
        # for grad, var in grads_and_vars:
        #     if grad is not None:
        #         clipped_grads_and_vars.append(
        #             (tf.clip_by_value(grad,
        #                               clip_value_min=-self.clip_grad_norm,
        #                               clip_value_max=self.clip_grad_norm),
        #              var))

        # TODO: Add histograms for variables, gradients (norms)
        # self._tensorboard(trainable_vars)

        return clipped_grads_and_vars
示例#5
0
 def create_update_op_backup(self):
     optimizer = tf.train.MomentumOptimizer(self.config.learning_rate, self.config.momentum)
     #self.update_op = optimizer.minimize(self.loss)
     
     g_list = optimizer.compute_gradients(self.loss)
     
     # 000
     g_list_new = [(tf.clip_by_norm(g, 5), v) for g, v in g_list]
     # g_list_new = []
     # for g, v in g_list:
         # g_not_finite = tf.logical_or(tf.is_nan(g), tf.is_inf(g))
         
         # 001
         # g = tf.select(g_not_finite, tf.zeros_like(g), g)
         # g = tf.clip_by_norm(g, 5)
         # g = tf.select(g_not_finite, 0.1*v, g)
         
         # 002
         # g = tf.convert_to_tensor(g)
         # g_norm = tf.sqrt(tf.reduce_sum(tf.square(g)))
         # g = tf.select(g_not_finite, 0.1*v, g*5/g_norm)
         
         # g_list_new.append((g, v))
     
     self.update_op = optimizer.apply_gradients(g_list_new)
     return
示例#6
0
  def __init__(self, sess, pred_network, env, stat, conf, target_network=None):
    super(DeepQ, self).__init__(sess, pred_network, target_network, env, stat, conf)

    # Optimizer
    with tf.variable_scope('optimizer'):
      self.targets = tf.placeholder('float32', [None], name='target_q_t')
      self.actions = tf.placeholder('int64', [None], name='action')

      actions_one_hot = tf.one_hot(self.actions, self.env.action_size, 1.0, 0.0, name='action_one_hot')
      pred_q = tf.reduce_sum(self.pred_network.outputs * actions_one_hot, reduction_indices=1, name='q_acted')

      self.delta = self.targets - pred_q
      if self.max_delta and self.min_delta:
        self.delta = tf.clip_by_value(self.delta, self.min_delta, self.max_delta, name='clipped_delta')

      self.loss = tf.reduce_mean(tf.square(self.delta), name='loss')

      self.learning_rate_op = tf.maximum(self.learning_rate_minimum,
          tf.train.exponential_decay(
              self.learning_rate,
              self.stat.t_op,
              self.learning_rate_decay_step,
              self.learning_rate_decay,
              staircase=True))

      optimizer = tf.train.RMSPropOptimizer(
        self.learning_rate_op, momentum=0.95, epsilon=0.01)
      
      grads_and_vars = optimizer.compute_gradients(self.loss)
      for idx, (grad, var) in enumerate(grads_and_vars):
        if grad is not None:
          grads_and_vars[idx] = (tf.clip_by_norm(grad, self.max_grad_norm), var)
      self.optim = optimizer.apply_gradients(grads_and_vars)
示例#7
0
文件: dpg_ops.py 项目: wmiao1769/trfl
def dpg(q_max, a_max, dqda_clipping=None, clip_norm=False, name="DpgLearning"):
  """Implements the Deterministic Policy Gradient (DPG) loss as a TensorFlow Op.

  This op implements the loss for the `actor`, the `critic` can instead be
  updated by minimizing the `value_ops.td_learning` loss.

  See "Deterministic Policy Gradient Algorithms" by Silver, Lever, Heess,
  Degris, Wierstra, Riedmiller (http://proceedings.mlr.press/v32/silver14.pdf).

  Args:
    q_max: Tensor holding Q-values generated by Q network with the input of
      (state, a_max) pair, shape `[B]`.
    a_max: Tensor holding the optimal action, shape `[B, action_dimension]`.
    dqda_clipping: `int` or `float`, clips the gradient dqda element-wise
      between `[-dqda_clipping, dqda_clipping]`.
    clip_norm: Whether to perform dqda clipping on the vector norm of the last
      dimension, or component wise (default).
    name: name to prefix ops created within this op.

  Returns:
    A namedtuple with fields:

    * `loss`: a tensor containing the batch of losses, shape `[B]`.
    * `extra`: a namedtuple with fields:
        * `q_max`: Tensor holding the optimal Q values, `[B]`.
        * `a_max`: Tensor holding the optimal action, `[B, action_dimension]`.
        * `dqda`: Tensor holding the derivative dq/da, `[B, action_dimension]`.

  Raises:
    ValueError: If `q_max` doesn't depend on `a_max` or if `dqda_clipping <= 0`.
  """

  # DPG op.
  with tf.name_scope(name, values=[q_max, a_max]):

    # Calculate the gradient dq/da.
    dqda = tf.gradients([q_max], [a_max])[0]

    # Check that `q_max` depends on `a_max`.
    if dqda is None:
      raise ValueError("q_max needs to be a function of a_max")

    # Clipping the gradient dq/da.
    if dqda_clipping is not None:
      if dqda_clipping <= 0:
        raise ValueError("dqda_clipping should be bigger than 0, {} found"
                         .format(dqda_clipping))
      if clip_norm:
        dqda = tf.clip_by_norm(dqda, dqda_clipping, axes=-1)
      else:
        dqda = tf.clip_by_value(dqda, -1. * dqda_clipping, dqda_clipping)

    # Target_a ensures correct gradient calculated during backprop.
    target_a = dqda + a_max
    # Stop the gradient going through Q network when backprop.
    target_a = tf.stop_gradient(target_a)
    # Gradient only go through actor network.
    loss = 0.5 * tf.reduce_sum(tf.square(target_a - a_max), axis=-1)
    return base_ops.LossOutput(
        loss, DPGExtra(q_max=q_max, a_max=a_max, dqda=dqda))
示例#8
0
    def __init__(self, optimizer, devices, input_placeholders,
                 per_device_batch_size, build_loss, logdir,
                 grad_norm_clipping=None):
        self.optimizer = optimizer
        self.devices = devices
        self.batch_size = per_device_batch_size * len(devices)
        self.per_device_batch_size = per_device_batch_size
        self.input_placeholders = input_placeholders
        self.build_loss = build_loss
        self.logdir = logdir

        # First initialize the shared loss network
        with tf.variable_scope(TOWER_SCOPE_NAME):
            self._shared_loss = build_loss(*input_placeholders)

        # Then setup the per-device loss graphs that use the shared weights
        self._batch_index = tf.placeholder(tf.int32)

        # Split on the CPU in case the data doesn't fit in GPU memory.
        with tf.device("/cpu:0"):
            data_splits = zip(
                *[tf.split(ph, len(devices)) for ph in input_placeholders])

        self._towers = []
        for device, device_placeholders in zip(self.devices, data_splits):
            self._towers.append(self._setup_device(device,
                                                   device_placeholders))

        avg = average_gradients([t.grads for t in self._towers])
        if grad_norm_clipping:
            for i, (grad, var) in enumerate(avg):
                if grad is not None:
                    avg[i] = (tf.clip_by_norm(grad, grad_norm_clipping), var)
        self._train_op = self.optimizer.apply_gradients(avg)
示例#9
0
文件: dqn.py 项目: danfeiX/drl
def train(lr, total_loss, global_step):
    # Variables that affect learning rate.

    # Compute gradients.
    #with tf.control_dependencies([loss_averages_op]):
    opt = tf.train.GradientDescentOptimizer(lr)
    grads = opt.compute_gradients(total_loss)

    # Add histograms for gradients.
    for i, (grad, var) in enumerate(grads):
        if grad is not None:
            tf.histogram_summary(var.op.name + '/gradients', grad)
            grads[i] = (tf.clip_by_norm(grad, 5), var)

    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
        train_op = tf.no_op(name='train')

    return train_op
示例#10
0
  def build_model(self, mode, embedding_method):
    self.build_memory()
    # self.skip_model = skip.load_model()
    self.skip_model = None
    self.reg_loss = tf.mul(tf.nn.l2_loss(self.T), self.gamma, name='regularization_loss')
    self.data_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(self.probs, self.target, name='data_loss')
    self.loss = tf.add(self.reg_loss, self.data_loss, name = 'total_loss')
    self.average_loss = tf.reduce_mean(self.loss)
    self.opt = tf.train.GradientDescentOptimizer(self.lr)
    self.correct_prediction = tf.equal(self.target, tf.argmax(self.probs,1))
    self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))

    grads_and_vars = self.opt.compute_gradients(self.loss, self.params)
    cliped_grads_and_vars = [(tf.clip_by_norm(gv[0], 40), gv[1]) for gv in grads_and_vars]
    inc_op = self.global_step.assign_add(1)
    with tf.control_dependencies([inc_op]):
      self.apply_grad_op = self.opt.apply_gradients(cliped_grads_and_vars)

    self.saver = tf.train.Saver()

    # At Inference mode
    if mode == 'inference':
        if embedding_method == 'word2vec':
            self.saver.restore(self.sess, './demo/MN_shortcut/model.ckpt')
        elif embedding_method == 'skip':
            print 'Restoring model from ./demo/MN_shortcut/skip_plot_40.ckpt'
            self.saver.restore(self.sess, './demo/MN_shortcut/skip_plot_40.ckpt')
    else:
        tf.initialize_all_variables().run()
示例#11
0
文件: model.py 项目: amharc/jnp3
    def _init_train(self):
        readout = tf.stop_gradient(self.target_network.readout)

        # 0 if terminal, max(prediction) if not
        future_rewards = tf.reduce_max(readout, reduction_indices=[1,]) * (1 - self.terminals)
        tf.histogram_summary("rewards_future", future_rewards)

        wanted = self.rewards + self.settings['discount'] * future_rewards
        tf.histogram_summary("rewards_wanted", wanted)

        current = tf.reduce_sum(
                self.act_network.readout * self.action_mask,
                reduction_indices=[1,],
                name="rewards_current"
            )
        tf.histogram_summary("rewards_current", current)

        loss = tf.square(current - wanted)
        self.error = tf.reduce_sum(loss, name="prediction_error")

        tf.scalar_summary('error', self.error)

        grad_vars = self.settings['optimizer'].compute_gradients(self.error)

        clipped_grad_vars = [(tf.clip_by_norm(grad, 10) if grad else None, var)
                for (grad, var) in grad_vars]

        for grad, var in clipped_grad_vars:
            tf.histogram_summary(var.name, var)
            if grad:
                tf.histogram_summary(var.name + "_clipgrad", grad)

        self.train_op = self.settings['optimizer'].apply_gradients(clipped_grad_vars, global_step=self.global_step)
示例#12
0
    def build_model(self):
        self.build_memory()

        self.W = tf.Variable(tf.random_normal([self.edim, self.nwords], stddev=self.init_std)) # d*V 매핑
        z = tf.matmul(self.hid[-1], self.W) # 마지막 hop의 output.. (o^k + u^k)
        # [batch_size, edim] * [edim,nwords] => [batch_size, nwords]
        
        self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=z,labels= self.target) # loss
        # target은 [batch_size, nwords] one-hot encoding 되어 있음.
        
        self.lr = tf.Variable(self.current_lr)
        self.opt = tf.train.GradientDescentOptimizer(self.lr)

        params = [self.A, self.B, self.C, self.T_A, self.T_B, self.W]
        grads_and_vars = self.opt.compute_gradients(self.loss,params)
        clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], self.max_grad_norm), gv[1]) \
                                   for gv in grads_and_vars] # List of (gradient, variable) pairs // gradient는 clip_by_norm !
        
        # clip by norm 해서 각각 gradients update

        inc = self.global_step.assign_add(1) # global step 하나 올려주고
        with tf.control_dependencies([inc]): # 반드시 [] 안에 있는 value를 먼저 실행한 후, 아래 command가 실행된다.
            self.optim = self.opt.apply_gradients(clipped_grads_and_vars)
            # 원래는 train.optimizer 안에 param으로 global_step 쓰는데 clip_by_norm 하느라 쪼개는 바람에
            # 이를 보장해주기 위해서 이렇게 하는듯

        tf.global_variables_initializer().run()
        self.saver = tf.train.Saver()
示例#13
0
  def make_accumulated_gradients(self):
    reset_accum_grads = []
    new_grads_and_vars = []

    # 1. Prepare accum_grads
    self.accum_grads = {}
    self.add_accum_grads = {}

    for step, network in enumerate(self.networks):
      grads_and_vars = self.global_optim.compute_gradients(network.total_loss, network.w.values())
      _add_accum_grads = []

      for grad, var in tuple(grads_and_vars):
        if grad is not None:
          shape = grad.get_shape().as_list()

          name = 'accum/%s' % "/".join(var.name.split(':')[0].split('/')[-3:])
          if step == 0:
            self.accum_grads[name] = tf.Variable(
                tf.zeros(shape), trainable=False, name=name)

            global_v = global_var[re.sub(r'.*\/A3C_\d+\/', '', var.name)]
            new_grads_and_vars.append((tf.clip_by_norm(self.accum_grads[name].ref(), self.max_grad_norm), global_v))

            reset_accum_grads.append(self.accum_grads[name].assign(tf.zeros(shape)))

          _add_accum_grads.append(tf.assign_add(self.accum_grads[name], grad))

      # 2. Add gradient to accum_grads
      self.add_accum_grads[step] = tf.group(*_add_accum_grads)
def make_tf_Linv(layer, V_shape, c_shape, lr, act=tf.nn.tanh):
  """ builds graph for layer-local training of V and c """
  with tf.name_scope('layer'+str(layer)+'_inv') as scope:

    V = tf.get_variable(scope+'V', shape=V_shape, dtype=tf.float32, initializer=tf.orthogonal_initializer(0.95))
    #V = tf.get_variable(scope+'V', shape=V_shape, dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32))
    c = tf.get_variable(scope+'c', shape=c_shape, dtype=tf.float32, initializer=tf.constant_initializer(0.))
    
    W = tf.placeholder(tf.float32, shape=[V_shape[1], V_shape[0]], name='W')
    b = tf.placeholder(tf.float32, shape=[1, V_shape[0]], name='b')
    x_0 = tf.placeholder(tf.float32, shape=[None, V_shape[1]], name='input')
    
    fx = act(tf.matmul(x_0, W) + b)
    loss = 0.5*tf.reduce_mean((act(tf.matmul(fx, V) + c) - x_0)**2, name='loss')  
    
    s1 = tf.summary.scalar('log_loss'+str(layer), tf.log(loss))
    s2 = tf.summary.histogram('V'+str(layer), V)
    s3 = tf.summary.histogram('c'+str(layer), c) 
    
    opt = tf.train.RMSPropOptimizer(lr)
    gvs = opt.compute_gradients(loss, var_list=[V, c])
    sg  = [tf.summary.scalar('norm_grad'+var.name[-3], tf.nn.l2_loss(grad)) for grad, var in gvs] # var.name = 'namescope/V:0' and we want just 'V'
    clipped_gvs = [(tf.clip_by_norm(grad, 100.), var) for grad, var in gvs]
    
    return opt.apply_gradients(clipped_gvs), tf.summary.merge([s1] + sg)
def adv_target_net2(input_images, clip_norm=1.5):
    with tf.variable_scope('adv_encoder') as scope:
        width = 32
        height = 32
        batch_size = 128
        # code_length = 6000

        input_images = input_images/255

        # clip bound box
        mean, var = tf.nn.moments(input_images, axes=tuple(range(1,len(input_images.shape))), keep_dims=True)
        normed_input_images = (input_images-mean)/var

        # Convolutional layer 1
        conv1 = tf.layers.conv2d(inputs=normed_input_images,
                                 filters=32,
                                 kernel_size=(5, 5),
                                 # kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                 activation=tf.nn.leaky_relu,
                                 padding='SAME',
                                 name='adv_conv1')

        # maxpool layer1
        maxpool1 = tf.layers.max_pooling2d(conv1, (3,3), (2,2), 'SAME')
        
        # Convolutional layer 2
        conv2 = tf.layers.conv2d(inputs=maxpool1,
                                 filters=64,
                                 kernel_size=(5, 5),
                                 # kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                 activation=tf.nn.leaky_relu,
                                 padding='SAME',
                                 name='adv_conv2')

        # maxpool layer2
        maxpool2 = tf.layers.max_pooling2d(conv2, (3,3), (2,2), 'SAME')

        deconv1 = tf.layers.conv2d_transpose(maxpool2, 32, (5,5), (2,2), 'SAME',
                                             activation=tf.nn.leaky_relu,
                                             name='adv_deconv1')

        adv_mask = tf.layers.conv2d_transpose(deconv1, 3, (5,5), (2,2), 'SAME',
                                             activation=tf.nn.tanh,
                                             name='adv_deconv2')

        scaled_adv_mask = tf.clip_by_norm(adv_mask, clip_norm, axes=list(range(1,len(adv_mask.shape))))
        adv_images = tf.clip_by_value(scaled_adv_mask+input_images,0,1)
        output_images = tf.reshape(adv_images, (batch_size, height, width, 3)) * 255.0
        

        dif = adv_images - input_images

        tf.summary.image('adv_images', output_images)

        # Reconstruction L2 loss
        mean_square_error = tf.reduce_mean(tf.square(dif), axis=list(range(1,len(dif.shape))))
        loss = tf.reduce_mean(mean_square_error, name='dis_loss')
        
    return loss, output_images
示例#16
0
    def create_variables(self):
        self.target_q_network    = self.q_network.copy(scope="target_network")

        # FOR REGULAR ACTION SCORE COMPUTATION
        with tf.name_scope("taking_action"):
            self.observation        = self.q_network.input_placeholder("observation")
            self.action_scores      = tf.identity(self.q_network(self.observation), name="action_scores")
            tf.histogram_summary("action_scores", self.action_scores)
            self.predicted_actions  = tf.argmax(self.action_scores, dimension=1, name="predicted_actions")

        with tf.name_scope("estimating_future_rewards"):
            # FOR PREDICTING TARGET FUTURE REWARDS
            self.next_observation          = self.q_network.input_placeholder("next_observation")
            self.next_observation_mask     = tf.placeholder(tf.float32,
                                                            (None,),
                                                            name="next_observation_mask")
            self.next_action_scores        = self.target_q_network(self.next_observation)

            tf.histogram_summary("target_action_scores", self.next_action_scores)
            self.rewards                   = tf.placeholder(tf.float32, (None,), name="rewards")
            target_values                  = \
                    tf.reduce_max(self.next_action_scores, reduction_indices=[1,]) * self.next_observation_mask
            self.future_rewards            = self.rewards + self.discount_rate * target_values

        with tf.name_scope("q_value_precition"):
            # FOR PREDICTION ERROR
            self.action_mask                = tf.placeholder(tf.float32,
                                                              self.q_network.output_shape(),
                                                              name="action_mask")
            self.masked_action_scores       = tf.reduce_sum(self.action_scores * self.action_mask, reduction_indices=[1,])
            temp_diff                       = self.masked_action_scores - self.future_rewards
            self.prediction_error           = tf.reduce_mean(tf.square(temp_diff))
            gradients                       = self.optimizer.compute_gradients(
                                                    self.prediction_error,
                                                    var_list=self.q_network.variables())
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_norm(grad, 5), var)
            # Add histograms for gradients.
            for grad, var in gradients:
                tf.histogram_summary(var.name, var)
                if grad is not None:
                    tf.histogram_summary(var.name + '/gradients', grad)
            self.train_op                   = self.optimizer.apply_gradients(gradients)

        # UPDATE TARGET NETWORK
        with tf.name_scope("target_network_update"):
            self.target_network_update = []
            for v_source, v_target in zip(self.q_network.variables(), self.target_q_network.variables()):
                # this is equivalent to target = (1-alpha) * target + alpha * source
                update_op = v_target.assign_sub(self.target_network_update_rate * (v_target - v_source))
                self.target_network_update.append(update_op)
            self.target_network_update = tf.group(*self.target_network_update)

        # summaries
        tf.scalar_summary("prediction_error", self.prediction_error)

        self.summarize = tf.merge_all_summaries()
        self.no_op1    = tf.no_op()
示例#17
0
  def build_model(self, reuse, dev, ntype):
    with tf.variable_scope(self.name) and tf.device(dev):
      if reuse:
        tf.get_variable_scope().reuse_variables()
        assert tf.get_variable_scope().reuse

      # Set inputs of networks
      self.minimap = tf.placeholder(tf.float32, [None, U.minimap_channel(), self.msize, self.msize], name='minimap')
      self.screen = tf.placeholder(tf.float32, [None, U.screen_channel(), self.ssize, self.ssize], name='screen')
      self.info = tf.placeholder(tf.float32, [None, self.isize], name='info')

      # Build networks
      net = build_net(self.minimap, self.screen, self.info, self.msize, self.ssize, len(actions.FUNCTIONS), ntype)
      self.spatial_action, self.non_spatial_action, self.value = net

      # Set targets and masks
      self.valid_spatial_action = tf.placeholder(tf.float32, [None], name='valid_spatial_action')
      self.spatial_action_selected = tf.placeholder(tf.float32, [None, self.ssize**2], name='spatial_action_selected')
      self.valid_non_spatial_action = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='valid_non_spatial_action')
      self.non_spatial_action_selected = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='non_spatial_action_selected')
      self.value_target = tf.placeholder(tf.float32, [None], name='value_target')

      # Compute log probability
      spatial_action_prob = tf.reduce_sum(self.spatial_action * self.spatial_action_selected, axis=1)
      spatial_action_log_prob = tf.log(tf.clip_by_value(spatial_action_prob, 1e-10, 1.))
      non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.non_spatial_action_selected, axis=1)
      valid_non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.valid_non_spatial_action, axis=1)
      valid_non_spatial_action_prob = tf.clip_by_value(valid_non_spatial_action_prob, 1e-10, 1.)
      non_spatial_action_prob = non_spatial_action_prob / valid_non_spatial_action_prob
      non_spatial_action_log_prob = tf.log(tf.clip_by_value(non_spatial_action_prob, 1e-10, 1.))
      self.summary.append(tf.summary.histogram('spatial_action_prob', spatial_action_prob))
      self.summary.append(tf.summary.histogram('non_spatial_action_prob', non_spatial_action_prob))

      # Compute losses, more details in https://arxiv.org/abs/1602.01783
      # Policy loss and value loss
      action_log_prob = self.valid_spatial_action * spatial_action_log_prob + non_spatial_action_log_prob
      advantage = tf.stop_gradient(self.value_target - self.value)
      policy_loss = - tf.reduce_mean(action_log_prob * advantage)
      value_loss = - tf.reduce_mean(self.value * advantage)
      self.summary.append(tf.summary.scalar('policy_loss', policy_loss))
      self.summary.append(tf.summary.scalar('value_loss', value_loss))

      # TODO: policy penalty
      loss = policy_loss + value_loss

      # Build the optimizer
      self.learning_rate = tf.placeholder(tf.float32, None, name='learning_rate')
      opt = tf.train.RMSPropOptimizer(self.learning_rate, decay=0.99, epsilon=1e-10)
      grads = opt.compute_gradients(loss)
      cliped_grad = []
      for grad, var in grads:
        self.summary.append(tf.summary.histogram(var.op.name, var))
        self.summary.append(tf.summary.histogram(var.op.name+'/grad', grad))
        grad = tf.clip_by_norm(grad, 10.0)
        cliped_grad.append([grad, var])
      self.train_op = opt.apply_gradients(cliped_grad)
      self.summary_op = tf.summary.merge(self.summary)

      self.saver = tf.train.Saver(max_to_keep=100)
示例#18
0
  def testClipByNormClipped(self):
    # Norm clipping when clip_norm < 5
    with self.test_session():
      x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
      # Norm of x = sqrt(3^2 + 4^2) = 5
      np_ans = [[-2.4, 0.0, 0.0],
                [3.2, 0.0, 0.0]]
      clip_norm = 4.0
      ans = tf.clip_by_norm(x, clip_norm)
      tf_ans = ans.eval()
      
      clip_tensor = tf.constant(4.0)
      ans = tf.clip_by_norm(x, clip_norm)
      tf_ans_tensor = ans.eval()

    self.assertAllClose(np_ans, tf_ans)
    self.assertAllClose(np_ans, tf_ans_tensor)
示例#19
0
 def create_grads(self, loss, exclude, network, global_network):
   vs = list(set(network.var.keys()) - exclude)
   gs = tf.gradients(loss, [network.var[v] for v in vs])
   for i in xrange(len(gs)):
     if self.max_grad_norm > 0.:
       gs[i] = tf.clip_by_norm(gs[i], self.max_grad_norm)
     gs[i] /= self.n_threads
   return zip(gs, map(global_network.var.get, vs))
示例#20
0
文件: clip.py 项目: mthrok/luchador
def clip_by_norm(tensor, clip_norm, axes=None, name=None):
    """Implement clip_by_norm in Tensorflow backend.

    See :func:`luchador.nn.ops.clip_by_norm` for the detail.
    """
    _tensor = tf.clip_by_norm(
        tensor.unwrap(), clip_norm=clip_norm, axes=axes, name=name)
    return Tensor(tensor=_tensor, name=name)
示例#21
0
def flatgrad(loss, var_list, clip_norm=None):
    grads = tf.gradients(loss, var_list)
    if clip_norm is not None:
        grads = [tf.clip_by_norm(grad, clip_norm=clip_norm) for grad in grads]
    return tf.concat(axis=0, values=[
        tf.reshape(grad if grad is not None else tf.zeros_like(v), [numel(v)])
        for (v, grad) in zip(var_list, grads)
    ])
示例#22
0
 def _init_optimizer(self):
     with tf.variable_scope('Optimizer'):
         self.global_step = tf.get_variable('global_step', shape=[], dtype=tf.int32,
                                            initializer=tf.constant_initializer(0), trainable=False)
         self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.lr_ph, epsilon=1e-6)
         grads = self.opt.compute_gradients(self.loss)
         gradients, variables = zip(*grads)
         capped_grads = [tf.clip_by_norm(g, self.grad_clip) for g in gradients]
         self.train_op = self.opt.apply_gradients(zip(capped_grads, variables), global_step=self.global_step)
示例#23
0
文件: tools.py 项目: ulysseses/sr_exp
def clip_by_norm(gvs, grad_norm_thresh, scope="grad_clip"):
    """
    Clip gradients by norm, and scope.

    Args:
      gvs: list of gradient variable tuples
      grad_norm_thresh: norm threshold to clip
      scope: scope for the clip operation
    """
    if scope:
        with tf.name_scope(scope):
            gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \
                   for gv in gvs if gv[0]]
            return gvs
    else:
        gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \
               for gv in gvs if gv[0]]
        return gvs
示例#24
0
 def compute_gradients(self, loss, var_list=None, gate_gradients=1):
     grads_and_vars = self._optimizer.compute_gradients(
         loss, var_list=var_list, gate_gradients=gate_gradients)
     results = []
     for grad, var in grads_and_vars:
         # grad, var = pair[0], pair[1]
         if grad is not None:
             grad = tf.clip_by_norm(grad, self._clip)
         results.append((grad, var))
     return results
示例#25
0
def minimize_and_clip(optimizer, objective, var_list, clip_val=10):
    """Minimized `objective` using `optimizer` w.r.t. variables in
    `var_list` while ensure the norm of the gradients for each
    variable is clipped to `clip_val`
    """
    gradients = optimizer.compute_gradients(objective, var_list=var_list)
    for i, (grad, var) in enumerate(gradients):
        if grad is not None:
            gradients[i] = (tf.clip_by_norm(grad, clip_val), var)
    return optimizer.apply_gradients(gradients)
示例#26
0
    def testClipByNormNotClipped(self):
        # No norm clipping when clip_norm >= 5
        with self.test_session():
            x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
            # Norm of x = sqrt(3^2 + 4^2) = 5
            np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]]
            clip_norm = 6.0
            ans = tf.clip_by_norm(x, clip_norm)
            tf_ans = ans.eval()

        self.assertAllClose(np_ans, tf_ans)
示例#27
0
    def testClipByNormZero(self):
        # No norm clipping when norm = 0
        with self.test_session():
            x = tf.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
            # Norm = 0, no changes
            np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
            clip_norm = 6.0
            ans = tf.clip_by_norm(x, clip_norm)
            tf_ans = ans.eval()

        self.assertAllClose(np_ans, tf_ans)
示例#28
0
    def apply(self, loss):
        trainable = tf.trainable_variables()
        self._grads_and_vars = self._optimizer.compute_gradients(loss, trainable)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            # Ensures that we execute the update_ops before performing the train_step
            self._optimize_op = self._optimizer.minimize(loss, global_step=self._global_step, colocate_gradients_with_ops=True)

        for _, var in self._grads_and_vars:
            print(var.name)
            var.assign(tf.clip_by_norm(var, 2.0))
示例#29
0
  def _clip_gradients(self, grad):
    """Clips gradients if the hyperparameter `gradient_clip_norm` requires it.

    Sparse tensors, in the form of IndexedSlices returned for the
    gradients of embeddings, require special handling.

    Args:
      grad: Gradient Tensor, IndexedSlices, or None.

    Returns:
      Optionally clipped gradient.
    """
    if grad is not None and self.hyperparams.gradient_clip_norm > 0:
      if isinstance(grad, tf.IndexedSlices):
        tmp = tf.clip_by_norm(grad.values, self.hyperparams.gradient_clip_norm)
        return tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
      else:
        return tf.clip_by_norm(grad, self.hyperparams.gradient_clip_norm)
    else:
      return grad
示例#30
0
  def testClipByNormClippedWithDim0(self):
    # Norm clipping when clip_norm < 5
    with self.test_session():
      x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3])
      # Norm of x[:, 0] = sqrt(3^2 + 4^2) = 5, x[:, 2] = 3
      np_ans = [[-2.4, 0.0, 0.0],
                [3.2, 0.0, 3.0]]
      clip_norm = 4.0
      ans = tf.clip_by_norm(x, clip_norm, [0])
      tf_ans = ans.eval()

    self.assertAllClose(np_ans, tf_ans)
示例#31
0
Actions4Act_oh = tf.one_hot(Actions4Act, 4)

Act_A = Q(Act_S)
Command_A = tf.argmax(Act_A, axis=-1)

Act_Ap = Q(Act_Sp)
PL = tf.reduce_mean(
    tf.pow((Act_R + tf.reduce_max(Act_A) -
            tf.reduce_max(Act_Ap * Actions4Act_oh)), 2))  #Q

#Opt = tf.train.RMSPropOptimizer(1E-4, momentum=.0, centered=True).minimize(PL)
#Opt = tf.train.MomentumOptimizer(learning_rate=1E-6, momentum=.8).minimize(PL)

optimizer = tf.train.RMSPropOptimizer(1E-4, momentum=.9, centered=False)
gr, va = zip(*optimizer.compute_gradients(PL))
gr = [None if gr is None else tf.clip_by_norm(grad, 5.) for grad in gr]
Opt = optimizer.apply_gradients(zip(gr, va))

sess = tf.Session()
sess.run(tf.global_variables_initializer())

episode = 0
while (1):
    episode += 1
    Rp = 0.
    S = env.reset()  #(210, 160, 3)
    GameScore = 0
    Clives = 3
    Reward_cnt = 0.
    CuReward = 0.
    R_list, S_list = [], []
示例#32
0
    def construct_graph(self, sess):
        with sess.graph.as_default():

            # Set the random seed for tensorflow
            tf.set_random_seed(cfg.RNG_SEED)

            # Build the main computation graph
            layers = self.net.create_architecture(
                True)  # is_training flag: True

            # Define the loss
            loss = layers['total_loss']

            path_iter = self.pretrained_model.split('.ckpt')[0]
            iter_num = path_iter.split('_')[-1]

            # from iter_ckpt
            if cfg.TRAIN_MODULE_CONTINUE == 1:
                global_step = tf.Variable(int(iter_num), trainable=False)

            # from iter 0
            if cfg.TRAIN_MODULE_CONTINUE == 2:
                global_step = tf.Variable(0, trainable=False)

            #lr             = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE * 10, global_step, cfg.TRAIN.STEPSIZE * 5, cfg.TRAIN.GAMMA, staircase=True)
            # here we use cos lr scheme, i.e.
            first_decay_steps = 80000  # 2 epoches
            lr = cosine_decay_restarts(cfg.TRAIN.LEARNING_RATE * 10,
                                       global_step,
                                       first_decay_steps,
                                       t_mul=2.0,
                                       m_mul=1.0,
                                       alpha=0.0)
            self.optimizer = tf.train.MomentumOptimizer(lr, cfg.TRAIN.MOMENTUM)

            # list_var_to_update = []
            # if cfg.TRAIN_MODULE_UPDATE == 1:
            #     list_var_to_update = tf.trainable_variables()
            # if cfg.TRAIN_MODULE_UPDATE == 2:
            #     list_var_to_update = [var for var in tf.trainable_variables() if 'fc_binary' in var.name or 'binary_classification' in var.name]

            # 1--Update_all_parameter, 2--Only_Update_D, 3--Update_H+O+SP, 4--updating except classifiers of S(fc)
            list_var_to_update = []
            if cfg.TRAIN_MODULE_UPDATE == 1:
                list_var_to_update = tf.trainable_variables()
            if cfg.TRAIN_MODULE_UPDATE == 2:
                list_var_to_update = [
                    var for var in tf.trainable_variables()
                    if 'fc_binary' in var.name
                    or 'binary_classification' in var.name
                ]
            if cfg.TRAIN_MODULE_UPDATE == 3:
                list_var_to_update = [
                    var for var in tf.trainable_variables()
                    if 'fc_binary' not in var.name
                    or 'binary_classification' not in var.name
                ]
            if cfg.TRAIN_MODULE_UPDATE == 4:
                list_var_to_update = [
                    var for var in tf.trainable_variables()
                    if 'classification' not in var.name
                ]

            grads_and_vars = self.optimizer.compute_gradients(
                loss, list_var_to_update)
            capped_gvs = [(tf.clip_by_norm(grad, 1.), var)
                          for grad, var in grads_and_vars]

            train_op = self.optimizer.apply_gradients(capped_gvs,
                                                      global_step=global_step)
            self.saver = tf.train.Saver(max_to_keep=cfg.TRAIN.SNAPSHOT_KEPT)
            # Write the train and validation information to tensorboard
            self.writer = tf.summary.FileWriter(self.tbdir, sess.graph)

        return lr, train_op
示例#33
0
    def __init__(self, img_shape, train_mode=True, model_path=None, 
                 latent_dim=100, noise='uniform',
                 batch_size=64, d_learning_rate=1e-4, g_learning_rate=3e-4, eps=1e-8, 
                 Wloss=False, Bn=True, Adam=True
                 ):
        """
        Wloss: true for using loss introduced in WGAN; default is vanilla GAN loss
        Bn:    true for using batch normalization (also indicates no bias)
        Adam:  true for using Adam optimizer; false for using rmsprop
        """
                       
        self.img_shape = img_shape
        self.train_mode = train_mode
        self.model_path = model_path
        
        self.H = img_shape[0]
        self.W = img_shape[1]
        self.C = img_shape[2]

        self.z_size = latent_dim        
        self.batch_size = batch_size
        
        self.Wloss = Wloss
        self.Bn = Bn
          
        # build model
        self.DO_SHARE = None
        self.x_r = tf.placeholder(tf.float32, shape=[self.batch_size] + list(self.img_shape))
        
        if noise == 'normal':
            z = tf.random_normal((self.batch_size, 1, 1, self.z_size), 0, 1)
        elif noise == 'uniform':
            z = tf.random_uniform((self.batch_size, 1, 1, self.z_size), -1, 1)
        
        self.x_g = self.generator(z)       
        
        if self.Bn:               
            yl_r = self.discriminator(self.x_r)
            self.DO_SHARE = True
            yl_g = self.discriminator(self.x_g)
        else:
            x = tf.concat(0, [self.x_r, self.x_g])
            yl = self.discriminator(x)
            yl_r, yl_g = tf.split(0, 2, yl)        
        
        if Wloss:
            self.d_loss = tf.reduce_mean(yl_r - yl_g, axis=0)
            self.g_loss = tf.reduce_mean(yl_g, axis=0)
        else: # Vanilla GAN loss
            self.d_loss = ganloss(yl_r) + ganloss(yl_g, 0.) # no smooth label for fake data by improved GAN paper
            self.g_loss = ganloss(yl_g)
                    
        t_vars = tf.trainable_variables()
        
        self.d_vars = [var for var in t_vars if 'd_' in var.name]
        self.g_vars = [var for var in t_vars if 'g_' in var.name]
        
        if Adam:
            self.d_optimizer = tf.train.AdamOptimizer(d_learning_rate, beta1=0.5, beta2=0.999)
            d_grads = self.d_optimizer.compute_gradients(self.d_loss, self.d_vars)
            clip_d_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in d_grads if grad is not None]
            self.d_optimizer = self.d_optimizer.apply_gradients(clip_d_grads)
            
            self.g_optimizer = tf.train.AdamOptimizer(g_learning_rate, beta1=0.5, beta2=0.999)
            g_grads = self.g_optimizer.compute_gradients(self.g_loss, self.g_vars)
            clip_g_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in g_grads if grad is not None]
            self.g_optimizer = self.g_optimizer.apply_gradients(clip_g_grads)
        else:
            self.d_optimizer = tf.train.RMSPropOptimizer(d_learning_rate, decay=0.99, epsilon=eps)
            d_grads = self.d_optimizer.compute_gradients(self.d_loss, self.d_vars)
            #clip_d_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in d_grads if grad is not None]
            clip_d_grads = [(grad, var) for grad, var in d_grads if grad is not None]
            self.d_optimizer = self.d_optimizer.apply_gradients(clip_d_grads)
            
            self.g_optimizer = tf.train.RMSPropOptimizer(g_learning_rate, decay=0.99, epsilon=eps)
            g_grads = self.g_optimizer.compute_gradients(self.g_loss, self.g_vars)
            #clip_g_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in g_grads if grad is not None]
            clip_g_grads = [(grad, var) for grad, var in g_grads if grad is not None]
            self.g_optimizer = self.g_optimizer.apply_gradients(clip_g_grads)   
            
        self.d_clip = [tf.assign(var, tf.clip_by_value(var, -0.01, 0.01)) for var in self.d_vars]
示例#34
0
def corrections_func(mainPN,
                     batch_size,
                     trace_length,
                     corrections=False,
                     cube=None,
                     clip_lola_update_norm=False,
                     lola_correction_multiplier=1.0,
                     clip_lola_correction_norm=False,
                     clip_lola_actor_norm=False,
                     against_destabilizer_exploiter=False):
    """Computes corrections for policy gradients.

    Args:
    -----
        mainPN: list of policy/Q-networks
        batch_size: int
        trace_length: int
        corrections: bool (default: False)
            Whether policy networks should use corrections.
        cube: tf.Varialbe or None (default: None)
            If provided, should be constructed via `lola.utils.make_cube`.
            Used for variance reduction of the value estimation.
            When provided, the computation graph for corrections is faster to
            compile but is quite memory inefficient.
            When None, variance reduction graph is contructed dynamically,
            is a little longer to compile, but has lower memory footprint.
    """
    # not mem_efficient
    if cube is not None:
        ac_logp0 = tf.reshape(mainPN[0].log_pi_action_bs_t,
                              [batch_size, 1, trace_length])
        ac_logp1 = tf.reshape(mainPN[1].log_pi_action_bs_t,
                              [batch_size, trace_length, 1])
        mat_1 = tf.reshape(tf.squeeze(tf.matmul(ac_logp1, ac_logp0)),
                           [batch_size, 1, trace_length * trace_length])

        v_0 = tf.matmul(
            tf.reshape(mainPN[0].sample_reward, [batch_size, trace_length, 1]),
            mat_1)
        v_0 = tf.reshape(
            v_0, [batch_size, trace_length, trace_length, trace_length])

        v_1 = tf.matmul(
            tf.reshape(mainPN[1].sample_reward, [batch_size, trace_length, 1]),
            mat_1)
        v_1 = tf.reshape(
            v_1, [batch_size, trace_length, trace_length, trace_length])

        v_0 = 2 * tf.reduce_sum(v_0 * cube) / batch_size
        v_1 = 2 * tf.reduce_sum(v_1 * cube) / batch_size
    # wt mem_efficient
    else:
        ac_logp0 = tf.reshape(mainPN[0].log_pi_action_bs_t,
                              [batch_size, trace_length])
        ac_logp1 = tf.reshape(mainPN[1].log_pi_action_bs_t,
                              [batch_size, trace_length])

        # Static exclusive cumsum
        ac_logp0_cumsum = [tf.constant(0.)]
        ac_logp1_cumsum = [tf.constant(0.)]
        for i in range(trace_length - 1):
            ac_logp0_cumsum.append(tf.add(ac_logp0_cumsum[-1], ac_logp0[:, i]))
            ac_logp1_cumsum.append(tf.add(ac_logp1_cumsum[-1], ac_logp1[:, i]))

        # Compute v_0 and v_1
        mat_cumsum = ac_logp0[:, 0] * ac_logp1[:, 0]
        v_0 = mat_cumsum * mainPN[0].sample_reward[:, 0]
        v_1 = mat_cumsum * mainPN[1].sample_reward[:, 0]
        for i in range(1, trace_length):
            mat_cumsum = tf.add(mat_cumsum, ac_logp0[:, i] * ac_logp1[:, i])
            mat_cumsum = tf.add(mat_cumsum,
                                ac_logp0_cumsum[i] * ac_logp1[:, i])
            mat_cumsum = tf.add(mat_cumsum,
                                ac_logp1_cumsum[i] * ac_logp0[:, i])
            v_0 = tf.add(v_0, mat_cumsum * mainPN[0].sample_reward[:, i])
            v_1 = tf.add(v_1, mat_cumsum * mainPN[1].sample_reward[:, i])
        v_0 = 2 * tf.reduce_sum(v_0) / batch_size

        if against_destabilizer_exploiter:
            v_1 = 2 * v_1 / batch_size
        else:
            v_1 = 2 * tf.reduce_sum(v_1) / batch_size

    mainPN[0].v_0_log = v_0
    mainPN[1].v_1_log = v_1
    actor_target_error_0 = (mainPN[0].target -
                            tf.stop_gradient(mainPN[0].value))
    v_0_pi_0 = 2*tf.reduce_sum((actor_target_error_0* mainPN[0].gamma_array) * mainPN[0].log_pi_action_bs_t) / \
               batch_size
    v_0_pi_1 = 2*tf.reduce_sum((actor_target_error_0 * mainPN[1].gamma_array) * mainPN[1].log_pi_action_bs_t) / \
               batch_size

    actor_target_error_1 = (mainPN[1].target -
                            tf.stop_gradient(mainPN[1].value))

    v_1_pi_0 = 2 * tf.reduce_sum(
        (actor_target_error_1 * mainPN[0].gamma_array) *
        mainPN[0].log_pi_action_bs_t) / batch_size
    v_1_pi_1 = 2 * tf.reduce_sum(
        (actor_target_error_1 * mainPN[1].gamma_array) *
        mainPN[1].log_pi_action_bs_t) / batch_size

    mainPN[0].actor_target_error = actor_target_error_0
    mainPN[1].actor_target_error = actor_target_error_1
    mainPN[0].actor_loss = v_0_pi_0
    mainPN[1].actor_loss = v_1_pi_1
    mainPN[0].value_used_for_correction = v_0
    mainPN[1].value_used_for_correction = v_1

    v_0_grad_theta_0 = flatgrad(v_0_pi_0, mainPN[0].parameters)
    v_0_grad_theta_1 = flatgrad(v_0_pi_1, mainPN[1].parameters)

    v_1_grad_theta_0 = flatgrad(v_1_pi_0, mainPN[0].parameters)
    v_1_grad_theta_1 = flatgrad(v_1_pi_1, mainPN[1].parameters)

    mainPN[0].grad = v_0_grad_theta_0
    mainPN[1].grad = v_1_grad_theta_1
    mainPN[0].grad_sum = tf.math.reduce_sum(v_0_grad_theta_0)
    mainPN[1].grad_sum = tf.math.reduce_sum(v_1_grad_theta_1)

    mainPN[0].grad_v_1 = v_1_grad_theta_0
    mainPN[1].grad_v_0 = v_0_grad_theta_1

    if corrections:
        v_0_grad_theta_0_wrong = flatgrad(v_0, mainPN[0].parameters)
        if against_destabilizer_exploiter:
            # v_1_grad_theta_1_wrong_splits = [ flatgrad(v_1[i], mainPN[1].parameters) for i in range(batch_size)]
            # v_1_grad_theta_1_wrong = tf.stack(v_1_grad_theta_1_wrong_splits, axis=1)

            v_1_grad_theta_1_wrong = tf.vectorized_map(
                partial(flatgrad, var_list=mainPN[1].parameters), v_1)
        else:
            v_1_grad_theta_1_wrong = flatgrad(v_1, mainPN[1].parameters)

        param_len = v_0_grad_theta_0_wrong.get_shape()[0].value
        # param_len = -1

        if against_destabilizer_exploiter:
            multiply0 = tf.matmul(
                tf.reshape(tf.stop_gradient(v_0_grad_theta_1), [1, param_len]),
                tf.reshape(v_1_grad_theta_1_wrong, [param_len, batch_size]))
        else:
            multiply0 = tf.matmul(
                tf.reshape(tf.stop_gradient(v_0_grad_theta_1), [1, param_len]),
                tf.reshape(v_1_grad_theta_1_wrong, [param_len, 1]))
        multiply1 = tf.matmul(
            tf.reshape(tf.stop_gradient(v_1_grad_theta_0), [1, param_len]),
            tf.reshape(v_0_grad_theta_0_wrong, [param_len, 1]))

        if against_destabilizer_exploiter:
            second_order0 = flatgrad(multiply0, mainPN[0].parameters)
            second_order0 = second_order0[:, None]

            # second_order0_splits = [flatgrad(multiply0[:, i], mainPN[0].parameters) for i in range(batch_size)]
            # second_order0 = tf.stack(second_order0_splits, axis=1)

            # second_order0 = tf.vectorized_map(partial(flatgrad, var_list=mainPN[0].parameters), multiply0[0, :])
            # second_order0 = tf.reshape(second_order0, [param_len, batch_size])
        else:
            second_order0 = flatgrad(multiply0, mainPN[0].parameters)
        second_order1 = flatgrad(multiply1, mainPN[1].parameters)

        mainPN[0].multiply0 = multiply0
        mainPN[0].v_0_grad_01 = second_order0
        mainPN[1].v_1_grad_10 = second_order1
        mainPN[0].second_order = tf.math.reduce_sum(second_order0)
        mainPN[1].second_order = tf.math.reduce_sum(second_order1)

        if against_destabilizer_exploiter:
            second_order0 = tf.math.reduce_sum(second_order0, axis=1)

        second_order0 = (second_order0 * lola_correction_multiplier)
        second_order1 = (second_order1 * lola_correction_multiplier)
        if clip_lola_correction_norm:
            second_order0 = tf.clip_by_norm(second_order0,
                                            clip_lola_correction_norm,
                                            axes=None,
                                            name=None)
            second_order1 = tf.clip_by_norm(second_order1,
                                            clip_lola_correction_norm,
                                            axes=None,
                                            name=None)
        if clip_lola_actor_norm:
            v_0_grad_theta_0 = tf.clip_by_norm(v_0_grad_theta_0,
                                               clip_lola_actor_norm,
                                               axes=None,
                                               name=None)
            v_1_grad_theta_1 = tf.clip_by_norm(v_1_grad_theta_1,
                                               clip_lola_actor_norm,
                                               axes=None,
                                               name=None)

        delta_0 = v_0_grad_theta_0 + second_order0
        delta_1 = v_1_grad_theta_1 + second_order1

        if clip_lola_update_norm:
            delta_0 = tf.clip_by_norm(delta_0,
                                      clip_lola_update_norm,
                                      axes=None,
                                      name=None)
            delta_1 = tf.clip_by_norm(delta_1,
                                      clip_lola_update_norm,
                                      axes=None,
                                      name=None)

        mainPN[0].delta = delta_0
        mainPN[1].delta = delta_1
    else:
        mainPN[0].delta = v_0_grad_theta_0
        mainPN[1].delta = v_1_grad_theta_1

        # To prevent some logic about logging stuff
        mainPN[0].v_0_grad_01 = tf.reduce_sum(v_0_grad_theta_0) * 0.0
        mainPN[1].v_1_grad_10 = tf.reduce_sum(v_0_grad_theta_0) * 0.0
def build_graph(sess, dictionary, NUM_CLASSES, vocabulary_size, embedding_size,
                input_tensor_shape_arr, output_tensor_shape_arr,
                bucketwise_max_level_arr):

    #TODO experiment with minval and maxval
    W = tf.Variable(tf.random_uniform([embedding_size, embedding_size],
                                      minval=-1.0,
                                      maxval=1.0),
                    name='rnn_w_general',
                    dtype=tf.float32)

    V_in = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size],
                                         minval=-1.0,
                                         maxval=1.0),
                       name='embedding_matrix_in',
                       dtype=tf.float32)
    V_out = tf.Variable(tf.random_uniform([NUM_CLASSES, embedding_size],
                                          minval=-1.0,
                                          maxval=1.0),
                        name='embedding_matrix_out',
                        dtype=tf.float32)

    b = tf.Variable(np.zeros((embedding_size)),
                    name='bias_general',
                    dtype=tf.float32)

    W_out = tf.Variable(tf.random_uniform([embedding_size, NUM_CLASSES],
                                          minval=-1.0,
                                          maxval=1.0),
                        name='w_out',
                        dtype=tf.float32)
    b_out = tf.Variable(np.zeros((NUM_CLASSES)),
                        name='bias_out',
                        dtype=tf.float32)

    # input_shape_tensor = tf.placeholder(tf.int32,shape=[1])

    recursion_out_arr = []
    loss_arr = []
    train_step_op_arr = []
    input_tensor_arr = []
    output_tensor_arr = []

    print('Preparing graph...')
    for index, level_arr in tqdm(enumerate(bucketwise_max_level_arr)):
        if index > config.MAX_NUM_BUCKETS_TO_TRAIN:
            continue
        input_tensor_shape = input_tensor_shape_arr[index]
        output_tensor_shape = output_tensor_shape_arr[index]

        input_tensor = tf.placeholder(tf.int32, shape=input_tensor_shape)

        output_tensor = tf.placeholder(tf.int32, shape=output_tensor_shape)

        # print('level_arr')
        # print(level_arr)
        # print('input_tensor_shape')
        # print(input_tensor_shape)

        # input_tensor = tf.placeholder(tf.int32,shape=(9,None))
        # print("--------------------------------")
        # level_arr = [3,3]
        recursion_out, _ = runRecursiveGraph2(input_tensor, W, V_in, V_out, b,
                                              0, level_arr, 0)
        # exit(0)
        loss, logits = getLoss({
            'weights': W_out,
            'biases': b_out,
            'num_classes': NUM_CLASSES,
            'output_word': output_tensor,
            'network_output': recursion_out,
        })

        optimizer = tf.train.AdamOptimizer(1e-3)
        gradients, variables = zip(*optimizer.compute_gradients(loss))
        gradients = [
            None if gradient is None else tf.clip_by_norm(gradient, 5.0)
            for gradient in gradients
        ]
        train_step_op = optimizer.apply_gradients(zip(gradients, variables))

        #TODO: check if some variable has already been initialised if selective initialisation
        print("*************Initialising variables*****************")
        for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
            print("Initialising " + v.op.name)
            sess.run(v.initializer)
        print("Uninitialised varaiables")
        print(tf.report_uninitialized_variables())

        recursion_out_arr.append(recursion_out)
        loss_arr.append(loss)
        train_step_op_arr.append(train_step_op)
        input_tensor_arr.append(input_tensor)
        output_tensor_arr.append(output_tensor)

    return recursion_out_arr,\
    loss_arr,\
    train_step_op_arr,\
    input_tensor_arr,\
    output_tensor_arr
示例#36
0
        attn_dnn1 = attn_dnn0
        
    with tf.name_scope("DNN_decode"):
        _pred , logits , _attn_map = dynamic_deconv(attn_dnn1,_embd,_embd_T,t_place)
    
    with tf.name_scope("Loss"):
        ce = tf.nn.softmax_cross_entropy_with_logits_v2(labels=ys_one_hot,logits=logits)*ys_mask
        ce = tf.reduce_sum(ce,axis=1,keepdims=False)/tf.cast(_ys_length,tf.float32)
        _loss = tf.reduce_mean(ce)
        
    with tf.name_scope("Train"):
        g_step = tf.Variable(0,dtype=tf.int32,trainable=False,name="Global_step")
        lr = tf.train.exponential_decay(3e-4,g_step,2000,0.95,staircase=True)
        opt = tf.train.AdamOptimizer(lr)
        allgrads = opt.compute_gradients(_loss) 
        clip_grads = [ ( tf.clip_by_norm(grad,3) ,var) for grad , var in allgrads]
        _update = opt.apply_gradients(clip_grads,global_step=g_step)
#         _update = opt.minimize(_loss,global_step=g_step)
        _global_step_assign = tf.placeholder(tf.int32)
        assign_g_step = g_step.assign_add(_global_step_assign)
#     with tf.name_scope("Gradient"):
#         _t_position = tf.placeholder(tf.int32)
#         _p_position = _pred[0,_t_position]
#         xs_vector_gradnorm = get_grad_norm(_t_position,_p_position,x_vector)
#         dnn1_gradnorm = get_grad_norm(_t_position,_p_position,attn_dnn0)
#         dnn2_gradnorm = get_grad_norm(_t_position,_p_position,attn_dnn1)
    
    all_var = tf.trainable_variables()
    _init = tf.global_variables_initializer()
    saver = tf.train.Saver(max_to_keep=8,var_list=tf.trainable_variables())
#     tf.summary.FileWriter(log_path,graph=g)
    def __init__(self,
                 model_name=None,
                 session=None,
                 learning_rate=None,
                 optimizer=None,
                 learning_decay_rate=None,
                 filter_sizes=None,
                 num_filters=None,
                 max_sentence_length=None,
                 num_classes=None,
                 embeddings=None,
                 new_embeddings=None,
                 embedding_dim=None,
                 vocabulary_size=None,
                 static=None,
                 max_l2_norm=None,
                 regularization_lambda=None,
                 dropout_keep_prob=None):

        if model_name == None:
            return

        self.model_name = model_name
        self.session = session
        self.learning_rate = learning_rate
        self.optimizer = optimizer
        self.dropout_keep_prob_train = dropout_keep_prob
        self.regularization_lambda = regularization_lambda

        ###############
        #
        #	model definition

        self.input_x = tf.placeholder(shape=(None, max_sentence_length),
                                      dtype=tf.int32,
                                      name="input_x")
        self.input_y = tf.placeholder(shape=(None, num_classes),
                                      dtype=tf.float32,
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                name="dropout_keep_prob")

        # ===== EMBEDDING LAYER
        self.embeddings_placeholder = tf.placeholder(tf.float32,
                                                     shape=(vocabulary_size,
                                                            embedding_dim))

        self.embeddings = tf.Variable(self.embeddings_placeholder,
                                      trainable=not static)
        self.new_embeddings = tf.Variable(new_embeddings, trainable=True)

        self.all_embeddings = tf.concat([self.embeddings, self.new_embeddings],
                                        axis=0)

        self.embedded_words = tf.nn.embedding_lookup(self.all_embeddings,
                                                     self.input_x)

        # ===== CONVOLUTIONAL LAYER
        self.input_x_expanded = tf.expand_dims(self.embedded_words, -1)

        self.pool_results = []
        for i, filter_size in enumerate(filter_sizes):

            filter = tf.get_variable(
                "filter" + str(i),
                shape=(filter_size, embedding_dim, 1, num_filters),
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            if max_l2_norm != 0:
                filter = tf.clip_by_norm(filter, max_l2_norm)
            bias = tf.Variable(tf.constant(0.0, shape=(num_filters, )))

            conv = tf.nn.conv2d(
                input=self.
                input_x_expanded,  # [batch, in_height, in_width, in_channels]
                filter=
                filter,  # [filter_height, filter_width, in_channels, out_channels]
                strides=[1, 1, 1, 1],
                padding="VALID")

            relu = tf.nn.relu(tf.nn.bias_add(conv, bias))

            conv_dim = max_sentence_length - filter_size + 1

            pooled = tf.nn.max_pool(relu,
                                    ksize=[1, conv_dim, 1, 1],
                                    strides=[1, 1, 1, 1],
                                    padding='VALID')
            self.pool_results.append(pooled)

        # FLATTENING LAYER

        num_filters_total = num_filters * len(filter_sizes)
        self.flat = tf.reshape(tf.concat(self.pool_results, 3),
                               [-1, num_filters_total])

        # DROPOUT LAYER

        self.dropout = tf.nn.dropout(self.flat, self.dropout_keep_prob)

        # FULLY CONNECTED LAYER

        W = tf.get_variable("W",
                            shape=(num_filters_total, num_classes),
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
        b = tf.Variable(tf.constant(0.1, shape=(num_classes, )))

        self.output = tf.nn.xw_plus_b(self.dropout, W, b, name="output")
        self.predictions = tf.argmax(self.output, 1, name="predictions")

        losses = tf.nn.softmax_cross_entropy_with_logits(labels=self.input_y,
                                                         logits=self.output)
        if regularization_lambda != 0:
            l2_loss = tf.nn.l2_loss(W)
            self.loss = tf.reduce_mean(tf.add(
                losses, tf.multiply(self.regularization_lambda, l2_loss)),
                                       name="loss")
        else:
            self.loss = tf.reduce_mean(losses, name="loss")

        #
        #
        ###############

        # optimization method
        self.optimizer = optimizer(learning_rate=self.learning_rate)

        # training operation
        self.train_op = self.optimizer.minimize(self.loss)

        # saver
        self.saver = tf.train.Saver()

        # initialize variables
        self.session.run(tf.global_variables_initializer(),
                         feed_dict={self.embeddings_placeholder: embeddings})
示例#38
0
def build_train(make_obs_ph,
                q_func,
                num_actions,
                optimizer,
                grad_norm_clipping=None,
                gamma=1.0,
                double_q=True,
                scope="deepq",
                reuse=None,
                param_noise=False,
                param_noise_filter_func=None):
    """Creates the train function:

    Parameters
    ----------
    make_obs_ph: str -> tf.placeholder or TfInput
        a function that takes a name and creates a placeholder of input with that name
    q_func: (tf.Variable, int, str, bool) -> tf.Variable
        the model that takes the following inputs:
            observation_in: object
                the output of observation placeholder
            num_actions: int
                number of actions
            scope: str
            reuse: bool
                should be passed to outer variable scope
        and returns a tensor of shape (batch_size, num_actions) with values of every action.
    num_actions: int
        number of actions
    reuse: bool
        whether or not to reuse the graph variables
    optimizer: tf.train.Optimizer
        optimizer to use for the Q-learning objective.
    grad_norm_clipping: float or None
        clip gradient norms to this value. If None no clipping is performed.
    gamma: float
        discount rate.
    double_q: bool
        if true will use Double Q Learning (https://arxiv.org/abs/1509.06461).
        In general it is a good idea to keep it enabled.
    scope: str or VariableScope
        optional scope for variable_scope.
    reuse: bool or None
        whether or not the variables should be reused. To be able to reuse the scope must be given.
    param_noise: bool
        whether or not to use parameter space noise (https://arxiv.org/abs/1706.01905)
    param_noise_filter_func: tf.Variable -> bool
        function that decides whether or not a variable should be perturbed. Only applicable
        if param_noise is True. If set to None, default_param_noise_filter is used by default.

    Returns
    -------
    act: (tf.Variable, bool, float) -> tf.Variable
        function to select and action given observation.
`       See the top of the file for details.
    train: (object, np.array, np.array, object, np.array, np.array) -> np.array
        optimize the error in Bellman's equation.
`       See the top of the file for details.
    update_target: () -> ()
        copy the parameters from optimized Q function to the target Q function.
`       See the top of the file for details.
    debug: {str: function}
        a bunch of functions to print debug data like q_values.
    """
    if param_noise:
        act_f = build_act_with_param_noise(
            make_obs_ph,
            q_func,
            num_actions,
            scope=scope,
            reuse=reuse,
            param_noise_filter_func=param_noise_filter_func)
    else:
        act_f = build_act(make_obs_ph,
                          q_func,
                          num_actions,
                          scope=scope,
                          reuse=reuse)

    with tf.variable_scope(scope, reuse=reuse):
        # set up placeholders
        obs_t_input = make_obs_ph("obs_t")
        act_t_ph = tf.placeholder(tf.int32, [None], name="action")
        rew_t_ph = tf.placeholder(tf.float32, [None], name="reward")
        obs_tp1_input = make_obs_ph("obs_tp1")
        done_mask_ph = tf.placeholder(tf.float32, [None], name="done")
        importance_weights_ph = tf.placeholder(tf.float32, [None],
                                               name="weight")

        # q network evaluation
        q_t = q_func(obs_t_input.get(),
                     num_actions,
                     scope="q_func",
                     reuse=True)  # reuse parameters from act
        q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        scope=tf.get_variable_scope().name +
                                        "/q_func")

        # target q network evalution
        q_tp1 = q_func(obs_tp1_input.get(), num_actions, scope="target_q_func")
        target_q_func_vars = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES,
            scope=tf.get_variable_scope().name + "/target_q_func")

        # q scores for actions which we know were selected in the given state.
        q_t_selected = tf.reduce_sum(q_t * tf.one_hot(act_t_ph, num_actions),
                                     1)

        # compute estimate of best possible value starting from state at t + 1
        if double_q:
            q_tp1_using_online_net = q_func(obs_tp1_input.get(),
                                            num_actions,
                                            scope="q_func",
                                            reuse=True)
            q_tp1_best_using_online_net = tf.argmax(q_tp1_using_online_net, 1)
            q_tp1_best = tf.reduce_sum(
                q_tp1 * tf.one_hot(q_tp1_best_using_online_net, num_actions),
                1)
        else:
            q_tp1_best = tf.reduce_max(q_tp1, 1)
        q_tp1_best_masked = (1.0 - done_mask_ph) * q_tp1_best

        # compute RHS of bellman equation
        q_t_selected_target = rew_t_ph + gamma * q_tp1_best_masked

        # compute the error (potentially clipped)
        td_error = q_t_selected - tf.stop_gradient(q_t_selected_target)
        errors = U.huber_loss(td_error)
        weighted_error = tf.reduce_mean(importance_weights_ph * errors)

        # compute optimization op (potentially with gradient clipping)
        if grad_norm_clipping is not None:
            gradients = optimizer.compute_gradients(weighted_error,
                                                    var_list=q_func_vars)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_norm(grad,
                                                    grad_norm_clipping), var)
            optimize_expr = optimizer.apply_gradients(gradients)
        else:
            optimize_expr = optimizer.minimize(weighted_error,
                                               var_list=q_func_vars)

        # update_target_fn will be called periodically to copy Q network to target Q network
        update_target_expr = []
        for var, var_target in zip(
                sorted(q_func_vars, key=lambda v: v.name),
                sorted(target_q_func_vars, key=lambda v: v.name)):
            update_target_expr.append(var_target.assign(var))
        update_target_expr = tf.group(*update_target_expr)

        # Create callable functions
        train = U.function(inputs=[
            obs_t_input, act_t_ph, rew_t_ph, obs_tp1_input, done_mask_ph,
            importance_weights_ph
        ],
                           outputs=td_error,
                           updates=[optimize_expr])
        update_target = U.function([], [], updates=[update_target_expr])

        q_values = U.function([obs_t_input], q_t)

        return act_f, train, update_target, {'q_values': q_values}
示例#39
0
def _grad_clip_by_norm_grad(op, grad):
    _, norm = op.inputs
    return (tf.clip_by_norm(grad, norm), None)
示例#40
0
    def build_model(self, reuse, dev, ntype):
        with tf.variable_scope(self.name) and tf.device(dev):
            if reuse:
                tf.get_variable_scope().reuse_variables()
                assert tf.get_variable_scope().reuse

            # Set inputs of networks
            self.minimap = tf.placeholder(
                tf.float32,
                [None, U.minimap_channel(), self.msize, self.msize],
                name='minimap')
            self.screen = tf.placeholder(
                tf.float32,
                [None, U.screen_channel(), self.ssize, self.ssize],
                name='screen')
            self.info = tf.placeholder(tf.float32, [None, self.isize],
                                       name='info')

            # Build networks
            net = build_net(self.minimap, self.screen, self.info, self.msize,
                            self.ssize, len(actions.FUNCTIONS), ntype)
            self.spatial_action, self.non_spatial_action, self.value = net

            # Set targets and masks
            self.valid_spatial_action = tf.placeholder(
                tf.float32, [None], name='valid_spatial_action')
            self.spatial_action_selected = tf.placeholder(
                tf.float32, [None, self.ssize**2],
                name='spatial_action_selected')
            self.valid_non_spatial_action = tf.placeholder(
                tf.float32, [None, len(actions.FUNCTIONS)],
                name='valid_non_spatial_action')
            self.non_spatial_action_selected = tf.placeholder(
                tf.float32, [None, len(actions.FUNCTIONS)],
                name='non_spatial_action_selected')
            self.value_target = tf.placeholder(tf.float32, [None],
                                               name='value_target')

            # Compute log probability
            spatial_action_prob = tf.reduce_sum(self.spatial_action *
                                                self.spatial_action_selected,
                                                axis=1)
            spatial_action_log_prob = tf.log(
                tf.clip_by_value(spatial_action_prob, 1e-10, 1.))
            non_spatial_action_prob = tf.reduce_sum(
                self.non_spatial_action * self.non_spatial_action_selected,
                axis=1)
            valid_non_spatial_action_prob = tf.reduce_sum(
                self.non_spatial_action * self.valid_non_spatial_action,
                axis=1)
            valid_non_spatial_action_prob = tf.clip_by_value(
                valid_non_spatial_action_prob, 1e-10, 1.)
            non_spatial_action_prob = non_spatial_action_prob / valid_non_spatial_action_prob
            non_spatial_action_log_prob = tf.log(
                tf.clip_by_value(non_spatial_action_prob, 1e-10, 1.))
            self.summary.append(
                tf.summary.histogram('spatial_action_prob',
                                     spatial_action_prob))
            self.summary.append(
                tf.summary.histogram('non_spatial_action_prob',
                                     non_spatial_action_prob))

            # Compute losses, more details in https://arxiv.org/abs/1602.01783
            # Policy loss and value loss
            action_log_prob = self.valid_spatial_action * spatial_action_log_prob + non_spatial_action_log_prob
            advantage = tf.stop_gradient(self.value_target - self.value)
            policy_loss = -tf.reduce_mean(action_log_prob * advantage)
            value_loss = -tf.reduce_mean(self.value * advantage)
            print('net:' + str(advantage))

            self.summary.append(tf.summary.scalar('policy_loss', policy_loss))
            self.summary.append(tf.summary.scalar('value_loss', value_loss))

            # TODO: policy penalty
            loss = policy_loss + value_loss

            # Build the optimizer
            self.learning_rate = tf.placeholder(tf.float32,
                                                None,
                                                name='learning_rate')
            opt = tf.train.RMSPropOptimizer(self.learning_rate,
                                            decay=0.99,
                                            epsilon=1e-10)

            grads = opt.compute_gradients(loss)

            cliped_grad = []
            for grad, var in grads:
                self.summary.append(tf.summary.histogram(var.op.name, var))
                #print('grad name:'+str(var.op.name) + ' , ' + str(grad))
                self.summary.append(
                    tf.summary.histogram(var.op.name + '/grad', grad))
                grad = tf.clip_by_norm(grad, 10.0)
                cliped_grad.append([grad, var])
            self.train_op = opt.apply_gradients(cliped_grad)
            self.summary_op = tf.summary.merge(self.summary)

            self.saver = tf.train.Saver(max_to_keep=100)
    def create_variables(self):
        # compute action from a state: a* = argmax_a Q(s_t,a)
        with tf.name_scope("predict_actions"):
            # raw state representation
            self.states = tf.placeholder(tf.float32, (None, self.state_dim),
                                         name="states")
            # initialize Q network
            with tf.variable_scope("q_network"):
                self.q_outputs = self.value_network(self.states, self.player)
            # predict actions from Q network
            self.action_scores = tf.identity(self.q_outputs,
                                             name="action_scores")
            tf.summary.histogram("action_scores", self.action_scores)
            self.predicted_actions = tf.argmax(self.action_scores,
                                               dimension=1,
                                               name="predicted_actions")

        # estimate rewards using the next state: r(s_t,a_t) + argmax_a Q(s_{t+1}, a)
        with tf.name_scope("estimate_future_rewards"):
            self.next_states = tf.placeholder(tf.float32,
                                              (None, self.state_dim),
                                              name="next_states")
            self.next_state_mask = tf.placeholder(tf.float32, (None, ),
                                                  name="next_state_masks")

            if self.double_q_learning:
                # reuse Q network for action selection
                with tf.variable_scope("q_network", reuse=True):
                    self.q_next_outputs = self.value_network(
                        self.next_states, self.player)
                self.action_selection = tf.argmax(tf.stop_gradient(
                    self.q_next_outputs),
                                                  1,
                                                  name="action_selection")
                tf.histogram_summary("action_selection", self.action_selection)
                self.action_selection_mask = tf.one_hot(
                    self.action_selection, self.num_actions, 1, 0)
                # use target network for action evaluation
                with tf.variable_scope("target_network"):
                    self.target_outputs = self.value_network(
                        self.next_states, self.player) * tf.cast(
                            self.action_selection_mask, tf.float32)
                self.action_evaluation = tf.reduce_sum(self.target_outputs,
                                                       reduction_indices=[
                                                           1,
                                                       ])
                tf.histogram_summary("action_evaluation",
                                     self.action_evaluation)
                self.target_values = self.action_evaluation * self.next_state_mask
            else:
                # initialize target network
                with tf.variable_scope("target_network"):
                    self.target_outputs = self.value_network(
                        self.next_states, self.player)
                # compute future rewards
                self.next_action_scores = tf.stop_gradient(self.target_outputs)
                self.target_values = tf.reduce_max(self.next_action_scores,
                                                   reduction_indices=[
                                                       1,
                                                   ]) * self.next_state_mask
                tf.summary.histogram("next_action_scores",
                                     self.next_action_scores)

            self.rewards = tf.placeholder(tf.float32, (None, ), name="rewards")
            self.future_rewards = self.rewards + self.discount_factor * self.target_values

        # compute loss and gradients
        with tf.name_scope("compute_temporal_differences"):
            # compute temporal difference loss
            self.action_mask = tf.placeholder(tf.float32,
                                              (None, self.num_actions),
                                              name="action_mask")
            self.masked_action_scores = tf.reduce_sum(self.action_scores *
                                                      self.action_mask,
                                                      reduction_indices=[
                                                          1,
                                                      ])
            self.temp_diff = self.masked_action_scores - self.future_rewards
            self.td_loss = tf.reduce_mean(tf.square(self.temp_diff))
            # regularization loss
            q_network_variables = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope="q_network")
            self.reg_loss = self.reg_param * tf.reduce_sum(
                [tf.reduce_sum(tf.square(x)) for x in q_network_variables])
            # compute total loss and gradients
            self.loss = self.td_loss + self.reg_loss
            gradients = self.optimizer.compute_gradients(self.loss)
            # clip gradients by norm
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_norm(grad,
                                                    self.max_gradient), var)
            # add histograms for gradients.
            for grad, var in gradients:
                tf.summary.histogram(var.name, var)
                if grad is not None:
                    tf.summary.histogram(var.name + '/gradients', grad)
            self.train_op = self.optimizer.apply_gradients(gradients)

        # update target network with Q network
        with tf.name_scope("update_target_network"):
            self.target_network_update = []
            # slowly update target network parameters with Q network parameters
            q_network_variables = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope="q_network")
            target_network_variables = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope="target_network")
            for v_source, v_target in zip(q_network_variables,
                                          target_network_variables):
                # this is equivalent to target = (1-alpha) * target + alpha * source
                update_op = v_target.assign_sub(self.target_update_rate *
                                                (v_target - v_source))
                self.target_network_update.append(update_op)
            self.target_network_update = tf.group(*self.target_network_update)

        # scalar summaries
        tf.summary.scalar("td_loss", self.td_loss)
        tf.summary.scalar("reg_loss", self.reg_loss)
        tf.summary.scalar("total_loss", self.loss)
        tf.summary.scalar("exploration", self.exploration)

        self.summarize = tf.summary.merge_all()
        self.no_op = tf.no_op()
示例#42
0
 def _clip(self, vec):
     if self.hmc_clip <= 0:
         return vec
     return tf.clip_by_norm(vec, self.hmc_clip, axes=[1])
示例#43
0
 def _euler_q(self, q, p, eps, M):
     p = tf.reshape(p, tf.shape(q))
     q_new = q + eps * (1 + tf.clip_by_norm(self.gx(p), 1)) / M
     return q_new
示例#44
0
        tf.compat.v1.summary.scalar(name='sum_step_loss', tensor=loss)

        total_loss = loss
        if params['reg_loss']:
            reg_loss = tf.reduce_sum(model.losses)
            total_loss += reg_loss
            tf.compat.v1.summary.scalar(name='regularization', tensor=reg_loss)

        tf.compat.v1.summary.scalar(name='total_loss', tensor=total_loss)
        tf.compat.v1.summary.scalar(name='lr', tensor=tf_vr_lr_in)

        opt = tf.compat.v1.train.AdamOptimizer(learning_rate=tf_vr_lr_in)

        if params['clip_grad']:
            gvs = opt.compute_gradients(total_loss)
            clipped_gvs = [(tf.clip_by_norm(grad, clip_norm=0.001), var)
                           for (grad, var) in gvs]
            train_step = opt.apply_gradients(clipped_gvs)

        else:
            train_step = opt.minimize(total_loss)

model.summary(print_fn=log.info)
sess.initialize_variables()

if params['pretf']:
    log.info('load a pre-trained model: {}'.format(params['pretf']))
    ld_model = keras.models.load_model(params['pretf'], compile=False)
    model.set_weights(ld_model.get_weights())

if params['inittf']:
示例#45
0
    embeddings = np.array(total_data[5])
    print("embedding_shape =", np.shape(embeddings))
    print("voca_len = ", len(voca))

    # model build
    cnn = model_build(sequence_length=len(x_train[0]), num_classes=len(y_train[0]),
                      vocab_size=len(voca), embeddings=embeddings)

    sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))
    saver = tf.train.Saver(tf.global_variables())
    global_step = tf.Variable(0, trainable=False, name='global_step')
    optimizer = tf.train.AdamOptimizer(1e-3)
    grads_and_vars = optimizer.compute_gradients(cnn.loss)
    for i, (g, v) in enumerate(grads_and_vars):
        if g is not None:
            grads_and_vars[i] = (tf.clip_by_norm(g, l2_lambda()), v)

    train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

    ckpt = tf.train.get_checkpoint_state('./TRECmodel')
    devlen = int(len(x_train) * dev_ratio())
    test_accuracy_ave = 0
    test_accuracy_list = []
    each_accuracy = []
    each_loss = []
    if model_variation() == 0:
        print("Model : rand")
    elif model_variation() == 1:
        print("Model : static")
    elif model_variation() == 2:
        print("Model : non-static")
示例#46
0
    def train(self, trial_batch_generator, train_params={}):
        """ Train the network.

        Arguments:
            trial_batch_generator (:class:`~psychrnn.tasks.task.Task` object or *Generator[tuple, None, None]*): the task to train on, or the task to train on's batch_generator. If a task is passed in, task.:func:`batch_generator` () will be called to get the generator for the task to train on.
            train_params (dict, optional): Dictionary of training parameters containing the following possible keys:

                :Dictionary Keys: 
                    * **learning_rate** (*float, optional*) -- Sets learning rate if use default optimizer Default: .001
                    * **training_iters** (*int, optional*) -- Number of iterations to train for Default: 50000.
                    * **loss_epoch** (*int, optional*) -- Compute and record loss every 'loss_epoch' epochs. Default: 10.
                    * **verbosity** (*bool, optional*) -- If true, prints information as training progresses. Default: True.
                    * **save_weights_path** (*str, optional*) -- Where to save the model after training. Default: None
                    * **save_training_weights_epoch** (*int, optional*) -- Save training weights every 'save_training_weights_epoch' epochs. Weights only actually saved if :data:`training_weights_path` is set. Default: 100.
                    * **training_weights_path** (*str, optional*) -- What directory to save training weights into as training progresses. Default: None.               
                    * **curriculum** (`~psychrnn.backend.curriculum.Curriculum` *object, optional*) -- Curriculum to train on. If a curriculum object is provided, it overrides the trial_batch_generator argument. Default: None.
                    * **optimizer** (`tf.compat.v1.train.Optimizer <https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/Optimizer>`_ *object, optional*) -- What optimizer to use to compute gradients. Default: `tf.train.AdamOptimizer <https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/AdamOptimizer>`_ (learning_rate=:data:`train_params`['learning_rate']` ).
                    * **clip_grads** (*bool, optional*) -- If true, clip gradients by norm 1. Default: True
                    * **fixed_weights** (*dict, optional*) -- By default all weights are allowed to train unless :data:`fixed_weights` or :data:`W_rec_train`, :data:`W_in_train`, or :data:`W_out_train` are set. Default: None. Dictionary of weights to fix (not allow to train) with the following optional keys:

                        Fixed Weights Dictionary Keys (in case of :class:`~psychrnn.backend.models.basic.Basic` and :class:`~psychrnn.backend.models.basic.BasicScan` implementations)
                            * **W_in** (*ndarray(dtype=bool, shape=(:attr:`N_rec`. :attr:`N_in` *)), optional*) -- True for input weights that should be fixed during training.
                            * **W_rec** (*ndarray(dtype=bool, shape=(:attr:`N_rec`, :attr:`N_rec` *)), optional*) -- True for recurrent weights that should be fixed during training.
                            * **W_out** (*ndarray(dtype=bool, shape=(:attr:`N_out`, :attr:`N_rec` *)), optional*) -- True for output weights that should be fixed during training.

                        :Note:
                            In general, any key in the dictionary output by :func:`get_weights` can have a key in the fixed_weights matrix, however fixed_weights will only meaningfully apply to trainable matrices.

                    * **performance_cutoff** (*float*) -- If :data:`performance_measure` is not ``None``, training stops as soon as performance_measure surpases the performance_cutoff. Default: None.
                    * **performance_measure** (*function*) -- Function to calculate the performance of the network using custom criteria. Default: None.

                        :Arguments:
                            * **trial_batch** (*ndarray(dtype=float, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Task stimuli for :attr:`N_batch` trials.
                            * **trial_y** (*ndarray(dtype=float, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Target output for the network on :attr:`N_batch` trials given the :data:`trial_batch`.
                            * **output_mask** (*ndarray(dtype=bool, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Output mask for :attr:`N_batch` trials. True when the network should aim to match the target output, False when the target output can be ignored.
                            * **output** (*ndarray(dtype=bool, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Output to compute the accuracy of. ``output`` as returned by :func:`psychrnn.backend.rnn.RNN.test`.
                            * **epoch** (*int*): Current training epoch (e.g. perhaps the performance_measure is calculated differently early on vs late in training)
                            * **losses** (*list of float*): List of losses from the beginning of training until the current epoch.
                            * **verbosity** (*bool*): Passed in from :data:`train_params`.

                        :Returns:
                            *float* 

                            Performance, greater when the performance is better.
        Returns:
            tuple:
            * **losses** (*list of float*) -- List of losses, computed every :data:`loss_epoch` epochs during training.
            * **training_time** (*float*) -- Time spent training.
            * **initialization_time** (*float*) -- Time spent initializing the network and preparing to train.

        """
        if not self.is_built:
            self.build()

        t0 = time()
        # --------------------------------------------------
        # Extract params
        # --------------------------------------------------
        learning_rate = train_params.get('learning_rate', .001)
        training_iters = train_params.get('training_iters', 50000)
        loss_epoch = train_params.get('loss_epoch', 10)
        verbosity = train_params.get('verbosity', True)
        save_weights_path = train_params.get('save_weights_path', None)
        save_training_weights_epoch = train_params.get(
            'save_training_weights_epoch', 100)
        training_weights_path = train_params.get('training_weights_path', None)
        curriculum = train_params.get('curriculum', None)
        optimizer = train_params.get(
            'optimizer',
            tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate))
        clip_grads = train_params.get('clip_grads', True)
        fixed_weights = train_params.get(
            'fixed_weights', None
        )  # array of zeroes and ones. One indicates to pin and not train that weight.
        performance_cutoff = train_params.get('performance_cutoff', None)
        performance_measure = train_params.get('performance_measure', None)

        if (performance_cutoff is not None
                and performance_measure is None) or (performance_cutoff is None
                                                     and performance_measure
                                                     is not None):
            raise UserWarning(
                "training will not be cutoff based on performance. Make sure both performance_measure and performance_cutoff are defined"
            )

        if curriculum is not None:
            trial_batch_generator = curriculum.get_generator_function()

        if not isgenerator(trial_batch_generator):
            trial_batch_generator = trial_batch_generator.batch_generator()

        # --------------------------------------------------
        # Make weights folder if it doesn't already exist.
        # --------------------------------------------------
        if save_weights_path != None:
            if path.dirname(save_weights_path) != "" and not path.exists(
                    path.dirname(save_weights_path)):
                makedirs(path.dirname(save_weights_path))

        # --------------------------------------------------
        # Make train weights folder if it doesn't already exist.
        # --------------------------------------------------
        if training_weights_path != None:
            if path.dirname(training_weights_path) != "" and not path.exists(
                    path.dirname(training_weights_path)):
                makedirs(path.dirname(training_weights_path))

        # --------------------------------------------------
        # Compute gradients
        # --------------------------------------------------
        grads = optimizer.compute_gradients(self.reg_loss)

        # --------------------------------------------------
        # Fixed Weights
        # --------------------------------------------------
        if fixed_weights is not None:
            for i in range(len(grads)):
                (grad, var) = grads[i]
                name = var.name[len(self.name) + 1:-2]
                if name in fixed_weights.keys():
                    grad = tf.multiply(grad, (1 - fixed_weights[name]))
                    grads[i] = (grad, var)

        # --------------------------------------------------
        # Clip gradients
        # --------------------------------------------------
        if clip_grads:
            grads = [(tf.clip_by_norm(grad, 1.0), var) if grad is not None else
                     (grad, var) for grad, var in grads]

        # --------------------------------------------------
        # Call the optimizer and initialize variables
        # --------------------------------------------------
        optimize = optimizer.apply_gradients(grads)
        self.sess.run(tf.compat.v1.global_variables_initializer())
        self.is_initialized = True

        # --------------------------------------------------
        # Record training time for performance benchmarks
        # --------------------------------------------------
        t1 = time()

        # --------------------------------------------------
        # Training loop
        # --------------------------------------------------
        epoch = 1
        batch_size = next(trial_batch_generator)[0].shape[0]
        losses = []
        if performance_cutoff is not None:
            performance = performance_cutoff - 1

        while (epoch - 1) * batch_size < training_iters and (
                performance_cutoff is None
                or performance < performance_cutoff):
            batch_x, batch_y, output_mask, _ = next(trial_batch_generator)
            self.sess.run(optimize,
                          feed_dict={
                              self.x: batch_x,
                              self.y: batch_y,
                              self.output_mask: output_mask
                          })
            # --------------------------------------------------
            # Output batch loss
            # --------------------------------------------------
            if epoch % loss_epoch == 0:
                reg_loss = self.sess.run(self.reg_loss,
                                         feed_dict={
                                             self.x: batch_x,
                                             self.y: batch_y,
                                             self.output_mask: output_mask
                                         })
                losses.append(reg_loss)
                if verbosity:
                    print("Iter " + str(epoch * batch_size) + ", Minibatch Loss= " + \
                          "{:.6f}".format(reg_loss))

            # --------------------------------------------------
            # Allow for curriculum learning
            # --------------------------------------------------
            if curriculum is not None and epoch % curriculum.metric_epoch == 0:
                trial_batch, trial_y, output_mask, _ = next(
                    trial_batch_generator)
                output, _ = self.test(trial_batch)
                if curriculum.metric_test(trial_batch, trial_y, output_mask,
                                          output, epoch, losses, verbosity):
                    if curriculum.stop_training:
                        break
                    trial_batch_generator = curriculum.get_generator_function()

            # --------------------------------------------------
            # Save intermediary weights
            # --------------------------------------------------
            if epoch % save_training_weights_epoch == 0:
                if training_weights_path is not None:
                    self.save(training_weights_path + str(epoch))
                    if verbosity:
                        print("Training weights saved in file: %s" %
                              training_weights_path + str(epoch))

            # ---------------------------------------------------
            # Update performance value if necessary
            # ---------------------------------------------------
            if performance_measure is not None:
                trial_batch, trial_y, output_mask, _ = next(
                    trial_batch_generator)
                output, _ = self.test(trial_batch)
                performance = performance_measure(trial_batch, trial_y,
                                                  output_mask, output, epoch,
                                                  losses, verbosity)
                if verbosity:
                    print("performance: " + str(performance))
            epoch += 1

        t2 = time()
        if verbosity:
            print("Optimization finished!")

        # --------------------------------------------------
        # Save final weights
        # --------------------------------------------------
        if save_weights_path is not None:
            self.save(save_weights_path)
            if verbosity:
                print("Model saved in file: %s" % save_weights_path)

        # --------------------------------------------------
        # Return losses, training time, initialization time
        # --------------------------------------------------
        return losses, (t2 - t1), (t1 - t0)
示例#47
0
    def create_variables(self):
        # создание нейросети T копированием из исходной нейросети N
        self.target_q_network = self.q_network.copy(scope="target_network")

        # расчет управляющего действия
        # FOR REGULAR ACTION SCORE COMPUTATION
        with tf.name_scope("taking_action"):
            # входные данные вектора состояния
            self.observation = tf.placeholder(tf.float32,
                                              (None, self.observation_size),
                                              name="observation")
            # расчитать очки оценки полезности каждого действия
            self.action_scores = tf.identity(self.q_network(self.observation),
                                             name="action_scores")
            tf.histogram_summary("action_scores", self.action_scores)
            # взять действие с максимальным количеством очков
            self.predicted_actions = tf.argmax(self.action_scores,
                                               dimension=1,
                                               name="predicted_actions")

        # расчет будущей пользы
        with tf.name_scope("estimating_future_rewards"):
            # FOR PREDICTING TARGET FUTURE REWARDS
            # входной параметр - будущие состояния
            self.next_observation = tf.placeholder(
                tf.float32, (None, self.observation_size),
                name="next_observation")
            # входной параметр - маски будущих состояний
            self.next_observation_mask = tf.placeholder(
                tf.float32, (None, ), name="next_observation_mask")
            # оценки полезности
            self.next_action_scores = tf.stop_gradient(
                self.target_q_network(self.next_observation))
            tf.histogram_summary("target_action_scores",
                                 self.next_action_scores)
            # входной параметр - награды
            self.rewards = tf.placeholder(tf.float32, (None, ), name="rewards")
            # взять максимальные оценки полезностей действий
            target_values = tf.identity(
                tf.reduce_max(self.next_action_scores, reduction_indices=[
                    1,
                ]) * self.next_observation_mask,
                name="target_values")
            # r + DF * MAX(Q,s) см статью о Q-learning в википедии
            #self.future_rewards            = self.rewards + self.discount_rate * target_values
            self.future_rewards = tf.identity(
                self.rewards + self.discount_rate * target_values,
                name="future_rewards")

        # обученте сети N
        with tf.name_scope("q_value_precition"):
            # FOR PREDICTION ERROR
            # входной параметр маски действий в наборе обучающих примеров
            self.action_mask = tf.placeholder(tf.float32,
                                              (None, self.num_actions),
                                              name="action_mask")
            # расчет полезностей действий набора обучающих примеров
            self.masked_action_scores = tf.reduce_sum(
                self.action_scores * self.action_mask,
                reduction_indices=[
                    1,
                ],
                name="masked_action_scores")
            # разности текущих полезностей и будущих
            # - (r + DF * MAX(Q,s) — Q[s',a'])
            #temp_diff                       = self.masked_action_scores - self.future_rewards
            temp_diff = tf.identity(self.masked_action_scores -
                                    self.future_rewards,
                                    name="temp_diff")
            # ключевой момент обучения сети
            # RMSProp минимизирует среднее от вышеуказанных разностей
            self.prediction_error = tf.reduce_mean(tf.square(temp_diff),
                                                   name="prediction_error")
            # работа RMSProp, первый шаг - вычисление градиентов
            gradients = self.optimizer.compute_gradients(self.prediction_error)
            #def get_zero(): return tf.constant(0.0)
            #def get_perror(): return self.prediction_error
            #gradients                       = self.optimizer.compute_gradients(tf.cond(tf.is_nan(self.prediction_error), get_zero, get_perror))
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_norm(grad, 5), var)
            # Add histograms for gradients.
            for grad, var in gradients:
                tf.histogram_summary(var.name, var)
                if grad is not None:
                    tf.histogram_summary(var.name + '/gradients', grad)
            # второй шаг - оптимизация параметров нейросети
            self.train_op = self.optimizer.apply_gradients(gradients,
                                                           name="train_op")

        # то самое место где настраивается сеть T
        # T = (1-alpha)*T + alpha*N
        # UPDATE TARGET NETWORK
        with tf.name_scope("target_network_update"):
            self.target_network_update = []
            for v_source, v_target in zip(self.q_network.variables(),
                                          self.target_q_network.variables()):
                # this is equivalent to target = (1-alpha) * target + alpha * source
                update_op = v_target.assign_sub(
                    self.target_network_update_rate * (v_target - v_source))
                self.target_network_update.append(update_op)
            self.target_network_update = tf.group(*self.target_network_update,
                                                  name="target_network_update")

        # summaries
        tf.scalar_summary("prediction_error", self.prediction_error)

        self.summarize = tf.merge_all_summaries()
        self.no_op1 = tf.no_op()
示例#48
0
    session_conf.gpu_options.allow_growth = True
    sess = tf.Session(config=session_conf)

    with sess.as_default():

        stan_reader = StanfordReader(max_entities=5, batch_size=FLAGS.batch_size)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate = FLAGS.learning_rate)
        # aggregation_method is an experimental feature introduced for faster gradient computation
        grads_and_vars = optimizer.compute_gradients(stan_reader.loss, aggregation_method = 2)
        clipped_grads = []
        for g, v in grads_and_vars:
            if g is not None:
                clipped = tf.clip_by_norm(g, clip_norm=10.)
                clipped_grads.append((clipped, v))

        train_op = optimizer.apply_gradients(clipped_grads, global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.merge_summary(grad_summaries)

        # Output directory for models and summaries
示例#49
0
    def __init__(self, mode="train"):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Set phase
        is_training = True if mode == "train" else False

        # Graph
        # Data Feeding
        # x: Text. (N, Tx)
        # y: Reduced melspectrogram. (N, Ty//r, n_mels*r)
        # z: Magnitude. (N, Ty, n_fft//2+1)
        if mode == "train":
            self.x, self.y, self.z, self.fnames, self.num_batch = get_batch()
        elif mode == "eval":
            self.x = tf.placeholder(tf.int32, shape=(None, None))
            self.y = tf.placeholder(tf.float32,
                                    shape=(None, None, hp.n_mels * hp.r))
            self.z = tf.placeholder(tf.float32,
                                    shape=(None, None, 1 + hp.n_fft // 2))
            self.fnames = tf.placeholder(tf.string, shape=(None, ))
        else:  # Synthesize
            self.x = tf.placeholder(tf.int32, shape=(None, None))
            self.y = tf.placeholder(tf.float32,
                                    shape=(None, None, hp.n_mels * hp.r))

        # Get encoder/decoder inputs
        self.encoder_inputs = embed(self.x, len(hp.vocab),
                                    hp.embed_size)  # (N, T_x, E)
        self.decoder_inputs = tf.concat(
            (tf.zeros_like(self.y[:, :1, :]), self.y[:, :-1, :]),
            1)  # (N, Ty/r, n_mels*r)
        self.decoder_inputs = self.decoder_inputs[:, :, -hp.
                                                  n_mels:]  # feed last frames only (N, Ty/r, n_mels)

        # Networks
        with tf.variable_scope("net"):
            # Encoder
            self.memory = encoder(self.encoder_inputs,
                                  is_training=is_training)  # (N, T_x, E)

            # Decoder1
            self.y_hat, self.alignments = decoder1(
                self.decoder_inputs, self.memory,
                is_training=is_training)  # (N, T_y//r, n_mels*r)
            # Decoder2 or postprocessing
            self.z_hat = decoder2(
                self.y_hat,
                is_training=is_training)  # (N, T_y//r, (1+n_fft//2)*r)

        # monitor
        self.audio = tf.py_func(spectrogram2wav, [self.z_hat[0]], tf.float32)

        if mode in ("train", "eval"):
            # Loss
            self.loss1 = tf.reduce_mean(tf.abs(self.y_hat - self.y))
            self.loss2 = tf.reduce_mean(tf.abs(self.z_hat - self.z))
            self.loss = self.loss1 + self.loss2

            # Training Scheme
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
            self.lr = learning_rate_decay(hp.lr, global_step=self.global_step)
            self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)

            ## gradient clipping
            self.gvs = self.optimizer.compute_gradients(self.loss)
            self.clipped = []
            for grad, var in self.gvs:
                grad = tf.clip_by_norm(grad, 5.)
                self.clipped.append((grad, var))
            self.train_op = self.optimizer.apply_gradients(
                self.clipped, global_step=self.global_step)

            # Summary
            tf.summary.scalar('{}/loss1'.format(mode), self.loss1)
            tf.summary.scalar('{}/loss'.format(mode), self.loss)
            tf.summary.scalar('{}/lr'.format(mode), self.lr)

            tf.summary.image("{}/mel_gt".format(mode),
                             tf.expand_dims(self.y, -1),
                             max_outputs=1)
            tf.summary.image("{}/mel_hat".format(mode),
                             tf.expand_dims(self.y_hat, -1),
                             max_outputs=1)
            tf.summary.image("{}/mag_gt".format(mode),
                             tf.expand_dims(self.z, -1),
                             max_outputs=1)
            tf.summary.image("{}/mag_hat".format(mode),
                             tf.expand_dims(self.z_hat, -1),
                             max_outputs=1)

            tf.summary.audio("{}/sample".format(mode),
                             tf.expand_dims(self.audio, 0), hp.sr)
            self.merged = tf.summary.merge_all()
示例#50
0
    def __init__(self,
                 batch_size,
                 vocab_size,
                 sentence_size,
                 memory_size,
                 embedding_size,
                 hops=3,
                 max_grad_norm=40.0,
                 nonlin=None,
                 initializer=tf.random_normal_initializer(stddev=0.1),
                 encoding=position_encoding,
                 session=tf.Session(config=tf.ConfigProto(
                     gpu_options=tf_gpu_options)),
                 l2=0.02,
                 lr=0.01,
                 epsilon=1e-8,
                 restoreLoc=None,
                 name='MemN2N'):
        """Creates an End-To-End Memory Network

        Args:
            batch_size: The size of the batch.

            vocab_size: The size of the vocabulary (should include the nil word). The nil word
            one-hot encoding should be 0.

            sentence_size: The max size of a sentence in the data. All sentences should be padded
            to this length. If padding is required it should be done with nil one-hot encoding (0).

            memory_size: The max size of the memory. Since Tensorflow currently does not support jagged arrays
            all memories must be padded to this length. If padding is required, the extra memories should be
            empty memories; memories filled with the nil word ([0, 0, 0, ......, 0]).

            embedding_size: The size of the word embedding.

            hops: The number of hops. A hop consists of reading and addressing a memory slot.
            Defaults to `3`.

            max_grad_norm: Maximum L2 norm clipping value. Defaults to `40.0`.

            nonlin: Non-linearity. Defaults to `None`.

            initializer: Weight initializer. Defaults to `tf.random_normal_initializer(stddev=0.1)`.

            optimizer: Optimizer algorithm used for SGD. Defaults to `tf.train.AdamOptimizer(learning_rate=1e-2)`.

            encoding: A function returning a 2D Tensor (sentence_size, embedding_size). Defaults to `position_encoding`.

            session: Tensorflow Session the model is run with. Defaults to `tf.Session()`.

            name: Name of the End-To-End Memory Network. Defaults to `MemN2N`.
        """

        self._batch_size = batch_size
        self._vocab_size = vocab_size
        self._sentence_size = sentence_size
        self._memory_size = memory_size
        self._embedding_size = embedding_size
        self._hops = hops
        self._max_grad_norm = max_grad_norm
        self._nonlin = nonlin
        self._init = initializer
        self._opt = tf.train.AdamOptimizer(learning_rate=lr, epsilon=epsilon)
        self._name = name
        self._l2 = l2

        self._build_inputs()
        self._build_vars()
        self._encoding = tf.constant(encoding(self._sentence_size,
                                              self._embedding_size),
                                     name="encoding")

        # cross entropy
        logits = self._inference(self._stories,
                                 self._queries)  # (batch_size, vocab_size)
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits,
            labels=tf.cast(self._answers, tf.float32),
            name="cross_entropy")
        cross_entropy_sum = tf.reduce_sum(cross_entropy,
                                          name="cross_entropy_sum")

        # loss op
        reg_loss = self._l2 * tf.add_n(tf.get_collection('reg_loss'))
        loss_op = cross_entropy_sum + reg_loss

        loss_op_summary = tf.summary.scalar("loss", loss_op)

        ema = tf.train.ExponentialMovingAverage(decay=0.99)
        self.update_loss_ema = ema.apply([loss_op])
        loss_ema = ema.average(loss_op)
        self.loss_ema_op = tf.summary.scalar('batch_loss_ema', loss_ema)

        # gradient pipeline
        grads_and_vars = self._opt.compute_gradients(loss_op)
        grads_and_vars = [(tf.clip_by_norm(g, self._max_grad_norm), v)
                          for g, v in grads_and_vars]
        grads_and_vars = [(add_gradient_noise(g), v)
                          for g, v in grads_and_vars]
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self._nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))
        train_op = self._opt.apply_gradients(nil_grads_and_vars,
                                             name="train_op")

        # predict ops
        predict_op = tf.argmax(logits, 1, name="predict_op")
        predict_proba_op = tf.nn.softmax(logits, name="predict_proba_op")
        predict_log_proba_op = tf.log(predict_proba_op,
                                      name="predict_log_proba_op")

        # validation accuracy ops
        self.val_acc_op = self._get_val_acc(predict_op, self._val_answers)
        self.val_acc_summary = tf.summary.scalar("val_acc", self.val_acc_op)

        # assign ops
        self.loss_op = loss_op
        self.predict_op = predict_op
        self.predict_proba_op = predict_proba_op
        self.predict_log_proba_op = predict_log_proba_op
        self.train_op = train_op
        self.loss_op_summary = loss_op_summary

        # Summaries
        self.merged = tf.summary.merge_all()

        self._sess = session

        if restoreLoc is not None:
            saver = tf.train.Saver()
            saver.restore(self._sess, restoreLoc)
        else:
            init_op = tf.initialize_all_variables()
            self._sess.run(init_op)
示例#51
0
def train():
    colorlog.basicConfig(
        filename=None,
        level=logging.INFO,
        format="%(log_color)s[%(levelname)s:%(asctime)s]%(reset)s %(message)s",
        datafmt="%Y-%m-%d %H:%M:%S")

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=False,
                                          gpu_options=gpu_options)) as sess:
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        num_examples_per_epoch, tower_img_embedding, tower_context_length, \
            tower_caption_length, tower_context_id, tower_caption_id, \
            tower_answer_id, tower_context_mask, \
            tower_caption_mask = enqueue(False)

        # Calculate the learning rate schedule.
        num_batches_per_epoch = (num_examples_per_epoch / FLAGS.batch_size /
                                 FLAGS.num_gpus)
        decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(FLAGS.init_lr,
                                        global_step,
                                        decay_steps,
                                        LEARNING_RATE_DECAY_FACTOR,
                                        staircase=True)

        # Create an optimizer that performs gradient descent.
        opt = tf.train.AdamOptimizer(lr)

        # Calculate the gradients for each model tower.
        tower_grads = []
        with tf.variable_scope(tf.get_variable_scope()) as scope:
            for i in xrange(FLAGS.num_gpus):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
                        # Calculate the loss for one tower of the CIFAR model. This function
                        # constructs the entire CIFAR model but shares the variables across
                        # all towers.
                        inputs = [
                            tower_img_embedding[i], tower_context_length[i],
                            tower_caption_length[i], tower_context_id[i],
                            tower_caption_id[i], tower_answer_id[i],
                            tower_context_mask[i], tower_caption_mask[i]
                        ]
                        loss = _tower_loss(inputs, scope)

                        # Reuse variables for the next tower.
                        tf.get_variable_scope().reuse_variables()

                        # Retain the summaries from the final tower.
                        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                      scope)

                        # Calculate the gradients for the batch of data on this CIFAR tower.
                        grads = opt.compute_gradients(loss)

                        # Keep track of the gradients across all towers.
                        tower_grads.append(grads)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = _average_gradients(tower_grads)

        # Add a summary to track the learning rate.
        summaries.append(tf.summary.scalar('learning_rate', lr))
        clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], \
            FLAGS.max_grad_norm), gv[1]) for gv in grads]
        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(clipped_grads_and_vars,
                                                global_step=global_step)
        # Create a saver.
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=200)

        # Build the summary operation from the last tower summaries.
        summary_op = tf.summary.merge(summaries)

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        sess.run(init)

        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        if ckpt and ckpt.model_checkpoint_path:
            # Restores from checkpoint
            saver.restore(sess, ckpt.model_checkpoint_path)
        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)
        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([apply_gradient_op, loss])
            duration = time.time() - start_time
            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if (step + 1) % 10 == 0:
                num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = duration / FLAGS.num_gpus

                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                c_g_step = int(global_step.eval(session=sess))
                print(format_str % (datetime.now(), c_g_step, loss_value,
                                    examples_per_sec, sec_per_batch))

            if (step + 1) % 25 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, c_g_step)

            # Save the model checkpoint periodically.
            if (step + 1) % 500 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=c_g_step)
示例#52
0
def build_train(make_obs_ph,
                q_func,
                num_actions,
                optimizer_f,
                grad_norm_clipping=None,
                gamma=1.0,
                scope="setdeepq",
                reuse=None,
                test_eps=0.05,
                lr_init=0.001,
                lr_period_steps=250000,
                tau=0.05):
    """Creates the train function:

    Parameters
    ----------
    make_obs_ph: str -> tf.placeholder or TfInput
        a function that takes a name and creates a placeholder of input with that name
    q_func: (tf.Variable, int, str, bool) -> tf.Variable
        the model that takes the following inputs:
            observation_in: object
                the output of observation placeholder
            num_actions: int
                number of actions
            scope: str
            reuse: bool
                should be passed to outer variable scope
        and returns a tensor of shape (batch_size, num_actions) with values of every action.
    num_actions: int
        number of actions
    reuse: bool
        whether or not to reuse the graph variables
    optimizer: tf.train.Optimizer
        optimizer to use for the Q-learning objective.
    grad_norm_clipping: float or None
        clip gradient norms to this value. If None no clipping is performed.
    gamma: float
        discount rate.
    scope: str or VariableScope
        optional scope for variable_scope.
    reuse: bool or None
        whether or not the variables should be reused. To be able to reuse the scope must be given.
    lr_init : float
        initial learning rate
    lr_period : int
        learning rate schedule following a cosine with this period
    tau : float
        parameter for the soft target network update. tau <= 1.0 and 1.0 for
        the hard update.

    Returns
    -------
    act: (tf.Variable, bool, float) -> tf.Variable
        function to select and action given observation.
`       See the top of the file for details.
    train: (object, np.array, np.array, object, np.array, np.array) -> np.array
        optimize the error in Bellman's equation.
`       See the top of the file for details.
    update_target: () -> ()
        copy the parameters from optimized Q function to the target Q function.
`       See the top of the file for details.
    debug: {str: function}
        a bunch of functions to print debug data like q_values.
    """
    # Build action graphs
    act_f = build_act(make_obs_ph,
                      q_func,
                      num_actions,
                      scope=scope,
                      reuse=reuse)

    act_greedy = build_act_greedy(make_obs_ph,
                                  q_func,
                                  num_actions,
                                  scope=scope,
                                  reuse=True,
                                  eps=test_eps)

    with tf.compat.v1.variable_scope(scope, reuse=reuse):
        # set up placeholders
        obs_t_input = make_obs_ph("obs_t")
        act_t_ph = tf.compat.v1.placeholder(tf.int32, [None], name="action")
        rew_t_ph = tf.compat.v1.placeholder(tf.float32, [None], name="reward")
        obs_tp1_input = make_obs_ph("obs_tp1")
        done_mask_ph = tf.compat.v1.placeholder(tf.float32, [None],
                                                name="done")
        importance_weights_ph = tf.compat.v1.placeholder(tf.float32, [None],
                                                         name="weight")
        iteration = tf.compat.v1.placeholder(tf.float32, name="iteration")

        # Cosine learning rate adjustment
        lr = tf.Variable(float(lr_init),
                         trainable=False,
                         dtype=tf.float32,
                         name='lr')
        lr = tf.clip_by_value(
            0.0005 * tf.math.cos(math.pi * iteration / lr_period_steps) +
            0.000501, 1e-6, 1e-3)
        optimizer = optimizer_f(learning_rate=lr)

        # q network evaluation
        q1_t = q_func.forward(obs_t_input.get(),
                              num_actions,
                              scope="q1_func",
                              reuse=True)  # reuse q1 parameters from act
        q1_func_vars = tf.compat.v1.get_collection(
            tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES,
            scope=tf.compat.v1.get_variable_scope().name + "/q1_func")
        q2_t = q_func.forward(obs_t_input.get(),
                              num_actions,
                              scope="q2_func",
                              reuse=True)  # reuse q2 parameters from act
        q2_func_vars = tf.compat.v1.get_collection(
            tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES,
            scope=tf.compat.v1.get_variable_scope().name + "/q2_func")

        # target q network evalution
        q1_tp1 = q_func.forward(obs_tp1_input.get(),
                                num_actions,
                                scope="target_q1_func",
                                reuse=False)
        target_q1_func_vars = tf.compat.v1.get_collection(
            tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES,
            scope=tf.compat.v1.get_variable_scope().name + "/target_q1_func")
        q2_tp1 = q_func.forward(obs_tp1_input.get(),
                                num_actions,
                                scope="target_q2_func",
                                reuse=False)
        target_q2_func_vars = tf.compat.v1.get_collection(
            tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES,
            scope=tf.compat.v1.get_variable_scope().name + "/target_q2_func")

        # q scores for actions which we know were selected in the given state.
        q1_t_selected = tf.reduce_sum(input_tensor=q1_t *
                                      tf.one_hot(act_t_ph, num_actions),
                                      axis=1)
        q2_t_selected = tf.reduce_sum(input_tensor=q2_t *
                                      tf.one_hot(act_t_ph, num_actions),
                                      axis=1)

        # Actions selected with current q funcs at state t+1.
        q1_tp1_using_online_net = q_func.forward(obs_tp1_input.get(),
                                                 num_actions,
                                                 scope="q1_func",
                                                 reuse=True)
        q2_tp1_using_online_net = q_func.forward(obs_tp1_input.get(),
                                                 num_actions,
                                                 scope="q2_func",
                                                 reuse=True)
        tp1_best_action_using_online_net = tf.argmax(
            input=q1_tp1_using_online_net + q2_tp1_using_online_net, axis=1)
        # Using action at t+1 find target value associated with the action
        q1_tp1_selected = tf.reduce_sum(
            input_tensor=q1_tp1 *
            tf.one_hot(tp1_best_action_using_online_net, num_actions),
            axis=1)
        q2_tp1_selected = tf.reduce_sum(
            input_tensor=q2_tp1 *
            tf.one_hot(tp1_best_action_using_online_net, num_actions),
            axis=1)
        # Min of target q values to be used bellman equation
        q_tp1_best = tf.minimum(q1_tp1_selected, q2_tp1_selected)

        # compute RHS of bellman equation
        q_tp1_selected_target = rew_t_ph + gamma * q_tp1_best

        # compute the error (potentially clipped)
        td_error1 = q1_t_selected - tf.stop_gradient(q_tp1_selected_target)
        td_error2 = q2_t_selected - tf.stop_gradient(q_tp1_selected_target)
        errors1 = U.huber_loss(td_error1)
        errors2 = U.huber_loss(td_error2)
        errors = errors1 + errors2
        weighted_error = tf.reduce_mean(input_tensor=importance_weights_ph *
                                        errors)

        #Print total number of params
        total_parameters = 0
        for variable in tf.compat.v1.trainable_variables():
            # shape is an array of tf.Dimension
            shape = variable.get_shape()
            variable_parameters = 1
            for dim in shape:
                variable_parameters *= dim.value
            # print("var params", variable_parameters)
            total_parameters += variable_parameters
        print(
            "===============================================================")
        print("Total number of trainable params:", total_parameters)
        print(
            "===============================================================")

        # Log for tensorboard
        tf.summary.scalar('q1_values', tf.math.reduce_mean(q1_t))
        tf.summary.scalar('q2_values', tf.math.reduce_mean(q2_t))
        tf.summary.scalar('td_1', tf.math.reduce_mean(td_error1))
        tf.summary.scalar('td_2', tf.math.reduce_mean(td_error2))
        tf.summary.scalar('weighted_loss', weighted_error)
        tf.summary.scalar('lr_schedule', lr)
        tf.summary.scalar('td_MSE_1',
                          tf.math.reduce_mean(tf.math.square(td_error1)))
        tf.summary.scalar('td_MSE_2',
                          tf.math.reduce_mean(tf.math.square(td_error2)))

        # combine variable scopes
        q_func_vars = q1_func_vars + q2_func_vars
        # compute optimization op (potentially with gradient clipping)
        if grad_norm_clipping is not None:
            gradients = optimizer.compute_gradients(weighted_error,
                                                    var_list=q_func_vars)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_norm(grad,
                                                    grad_norm_clipping), var)
            optimize_expr = optimizer.apply_gradients(gradients)
        else:
            optimize_expr = optimizer.minimize(weighted_error,
                                               var_list=q_func_vars)

        # update_target_fn will be called every step to copy Q network to target Q network
        # target network is updated with polyak averaging
        update_target_expr1 = []
        for var, var_target in zip(
                sorted(q1_func_vars, key=lambda v: v.name),
                sorted(target_q1_func_vars, key=lambda v: v.name)):
            update_target_expr1.append(
                var_target.assign(tau * var + (1 - tau) * var_target))
        update_target_expr1 = tf.group(*update_target_expr1)

        update_target_expr2 = []
        for var, var_target in zip(
                sorted(q2_func_vars, key=lambda v: v.name),
                sorted(target_q2_func_vars, key=lambda v: v.name)):
            update_target_expr2.append(
                var_target.assign(tau * var + (1 - tau) * var_target))
        update_target_expr2 = tf.group(*update_target_expr2)

        merged_summary = tf.compat.v1.summary.merge_all(
            scope=tf.compat.v1.get_variable_scope().name)
        # Create callable functions
        train = U.function(inputs=[
            obs_t_input, act_t_ph, rew_t_ph, obs_tp1_input, done_mask_ph,
            importance_weights_ph, iteration
        ],
                           outputs=[
                               td_error1, td_error2,
                               tf.reduce_mean(input_tensor=errors),
                               merged_summary
                           ],
                           updates=[optimize_expr, lr])
        update_target = U.function(
            [], [], updates=[update_target_expr1, update_target_expr2])

        q_values = U.function(inputs=[obs_t_input], outputs=[q1_t, q2_t])

        return act_f, act_greedy, q_values, train, update_target, {
            'q_values': q_values
        }
示例#53
0
    def grad_clip_fn(self, opt, loss, tvars, **kargs):
        gpu_count = self.config.get('gpu_count', 1)
        grad_name = kargs.get('grad_name', "grad_norm")
        grad_ratio = kargs.get('grad_name', {})
        if self.config.get("opt_type", "pai_soar") == "pai_soar":
            loss_fn = opt.compute_loss(loss,
                                       loss_scale=self.config.get(
                                           "loss_scale", 1))
            grads_and_vars = opt.compute_gradients(
                loss_fn, colocate_gradients_with_ops=True)
        else:
            grads_and_vars = opt.compute_gradients(loss, tvars)

            valid_vars = []
            for grad, var in grads_and_vars:
                if grad is not None:
                    valid_vars.append(var)
                    print(grad, var, tf.is_nan(grad), '====nan grad====')
                else:
                    print(var.name, "=====none grad======", grad_name)

            grads = [grad for grad, _ in grads_and_vars
                     if grad is not None]  # allreduce from sum to mean
            # grads_and_vars = zip(valid_grads, valid_vars)
            grad_clip = self.config.get("grad_clip", "global_norm")
            use_norm = tf.global_norm(grads)
            tf.summary.scalar(grad_name + '/total_grad_norm', use_norm)
            for grad, var in grads_and_vars:
                if grad is not None:
                    var_grad_norm = tf.global_norm([grad])
                    tf.summary.scalar(grad_name + "/" + var.name,
                                      var_grad_norm)
                # tf.summary.histogram(var.name, var)
                # tf.summary.histogram("grad/"+var.name, grad)

            tf.logging.info(" gradient clip method {}".format(grad_clip))

            if grad_clip == "global_norm":
                clip_norm = self.config.get("clip_norm", 1.0)
                if self.config.get("strategy", "") in [
                        'MirroredStrategy', 'CollectiveAllReduceStrategy'
                ]:
                    use_norm = tf.global_norm(grads)

                    [scale_grads, _] = tf.clip_by_global_norm(
                        grads,
                        clip_norm=clip_norm,
                        use_norm=use_norm * tf.sqrt(gpu_count * 1.0))

                    tf.summary.scalar(grad_name + '/grad_scale',
                                      use_norm * tf.sqrt(gpu_count * 1.0))
                else:
                    [scale_grads,
                     _] = tf.clip_by_global_norm(grads, clip_norm=clip_norm)
            elif grad_clip == "norm":
                clip_norm = self.config.get("clip_norm", 1.0)
                scale_grads = [
                    tf.clip_by_norm(grad, clip_norm) for grad in grads
                ]
            elif grad_clip == "value":
                clip_min_value = self.config.get("clip_min_value", -1.0)
                clip_max_value = self.config.get("clip_max_value", 1.0)
                scale_grads = [
                    tf.clip_by_value(grad, clip_norm) for grad in grads
                ]
            else:
                scale_grads = grads

            grads_and_vars = zip(scale_grads, valid_vars)

        return grads_and_vars
示例#54
0
    def build_graph(self, graph, embedding_array, Config):
        """

        :param graph:
        :param embedding_array:
        :param Config:
        :return:
        """

        with graph.as_default():
            self.embeddings = tf.Variable(embedding_array, dtype=tf.float32)

            """
            ===================================================================

            Define the computational graph with necessary variables.
            
            """

            self.train_inputs = tf.placeholder(tf.int32, shape=[constants.batch_size, constants.n_Tokens])
            self.train_labels = tf.placeholder(tf.int32, shape=[constants.batch_size, parsing_system.numTransitions()])


            train_embedding_lookup = tf.nn.embedding_lookup(self.embeddings, self.train_inputs)
            train_embed = tf.reshape(train_embedding_lookup, [constants.batch_size, -1])


            # Masking out invalid -1 transitions in train_labels
            #train_labels = tf.nn.relu(self.train_labels)

            #weights_input = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.embedding_size * constants.n_Tokens], stddev=1/math.sqrt(constants.embedding_size * constants.n_Tokens))

            '''biases_input = tf.Variable(tf.zeros([constants.hidden_size,1]))

            weights_output = tf.Variable(tf.truncated_normal(shape=[parsing_system.numTransitions(), constants.hidden_size], stddev= 1/ math.sqrt((constants.hidden_size))))




            #self.predictions = self.forward_pass_parallel(train_embed, weights_words, weights_tags, weights_labels, biases_words, biases_tags, biases_labels, weights_output)'''



            '''train_labels = tf.nn.relu(self.train_labels)

            


            self.predictions = self.forward_pass(train_embed, weights_input, biases_input, weights_output)


            self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.predictions, labels=train_labels)
            thetas = tf.nn.l2_loss(train_embed) + tf.nn.l2_loss(weights_input) + tf.nn.l2_loss(biases_input) + tf.nn.l2_loss(weights_output)
            self.loss = tf.reduce_mean(self.loss + constants.lam * thetas)'''




            ######################################## Remove these comment for 2 hidden layer implementation
            '''weights_input = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.embedding_size * constants.n_Tokens], stddev=1/math.sqrt(constants.embedding_size * constants.n_Tokens)))
            biases_input = tf.Variable(tf.zeros(shape = [constants.hidden_size, 1]))
            weights2 = tf.Variable(tf.truncated_normal(shape=[constants.hidden2_size, constants.hidden_size], stddev= 1/ math.sqrt((constants.hidden_size))))
            biases2 = tf.Variable(tf.zeros(shape = [constants.hidden2_size, 1]))
            weights_output = tf.Variable(tf.truncated_normal(shape=[parsing_system.numTransitions(), constants.hidden2_size], stddev=1 / math.sqrt(constants.hidden2_size)))

            self.predictions = self.forward_pass_2_hidden(train_embed, weights_input, biases_input,weights2, biases2, weights_output)

            train_labels = tf.nn.relu(self.train_labels)
            
            self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.predictions, labels=train_labels)
            thetas2 = tf.nn.l2_loss(train_embed) + tf.nn.l2_loss(weights_input) + tf.nn.l2_loss(biases_input) + tf.nn.l2_loss(weights2) + tf.nn.l2_loss(biases2) + tf.nn.l2_loss(weights_output)
            self.loss = tf.reduce_mean(self.loss + constants.lam * thetas2)'''


            ################### Alternate2 hidden layer

            '''weights_input = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.embedding_size * constants.n_Tokens], stddev=0.1))
            biases_input = tf.Variable(tf.random_normal(stddev= 0.1, shape = [constants.hidden_size]))
            weights2 = tf.Variable(tf.truncated_normal(shape=[constants.hidden2_size, constants.hidden_size], stddev=0.1))
            biases2 = tf.Variable(tf.random_normal(stddev= 0.1, shape = [constants.hidden2_size]))
            weights_output = tf.Variable(tf.truncated_normal(shape=[parsing_system.numTransitions(), constants.hidden2_size], stddev=0.1))

            self.predictions = self.forward_pass_2_hidden_alt(train_embed, weights_input, biases_input,weights2, biases2, weights_output)

            print self.predictions

            #self.predictions = tf.Print(self.predictions, [self.predictions])

            train_labels = tf.nn.relu(self.train_labels)
            
            self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.predictions, labels=train_labels)
            thetas2 = tf.nn.l2_loss(train_embed) + tf.nn.l2_loss(weights_input) + tf.nn.l2_loss(biases_input) + tf.nn.l2_loss(weights2) + tf.nn.l2_loss(biases2) + tf.nn.l2_loss(weights_output)
            self.loss = tf.reduce_mean(self.loss + constants.lam * thetas2)'''


            #############################   Remove the comments for 3 hidden layer implementation   #################################

            '''weights_input = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.embedding_size * constants.n_Tokens], stddev=1/math.sqrt(constants.embedding_size * constants.n_Tokens)))
            biases_input = tf.Variable(tf.zeros(shape = [constants.hidden_size, 1]))
            weights2 = tf.Variable(tf.truncated_normal(shape=[constants.hidden2_size, constants.hidden_size], stddev= 1/ math.sqrt((constants.hidden_size))))
            biases2 = tf.Variable(tf.zeros(shape = [constants.hidden2_size, 1]))
            weights3 = tf.Variable(tf.truncated_normal(shape=[constants.hidden3_size, constants.hidden2_size], stddev= 1/ math.sqrt((constants.hidden2_size))))
            biases3 = tf.Variable(tf.zeros([constants.hidden3_size, 1]))
            weights_output = tf.Variable(tf.truncated_normal(shape=[parsing_system.numTransitions(), constants.hidden3_size], stddev= 1/ math.sqrt((constants.hidden3_size))))
            
            self.predictions = self.forward_pass_3_hidden(train_embed, weights_input, biases_input,weights2, biases2, weights3, biases3, weights_output)

            train_labels = tf.nn.relu(self.train_labels)
            self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.predictions, labels=train_labels)
            thetas3 = tf.nn.l2_loss(train_embed) + tf.nn.l2_loss(weights_input) + tf.nn.l2_loss(biases_input) + tf.nn.l2_loss(weights2) + tf.nn.l2_loss(biases2) + tf.nn.l2_loss(weights3) + tf.nn.l2_loss(biases3) + tf.nn.l2_loss(weights_output)    
            self.loss = tf.reduce_mean(self.loss + constants.lam * thetas3)'''


            ##################### Use below commented code for 3 parralel layers for words, tags and labels ###############

            
            train_embed_words = tf.slice(train_embedding_lookup, [0, 0, 0], [constants.batch_size, constants.n_Tokens_word, constants.embedding_size])
            train_embed_words = tf.reshape(train_embed_words, [constants.batch_size, -1])

            train_embed_pos = tf.slice(train_embedding_lookup, [0, 18, 0], [constants.batch_size, constants.n_Tokens_pos, constants.embedding_size])
            train_embed_pos = tf.reshape(train_embed_pos, [constants.batch_size, -1])
            
            train_embed_labels = tf.slice(train_embedding_lookup, [0, 36, 0], [constants.batch_size, constants.n_Tokens_labels, constants.embedding_size])
            train_embed_labels = tf.reshape(train_embed_labels, [constants.batch_size, -1])


            weights_output_words = tf.Variable(tf.random_normal(shape=[parsing_system.numTransitions(), constants.hidden_size], stddev=1.0/math.sqrt(constants.hidden_size)))
            weights_output_pos = tf.Variable(tf.random_normal(shape=[parsing_system.numTransitions(), constants.hidden_size], stddev=1.0/math.sqrt(constants.hidden_size)))
            weights_output_labels = tf.Variable(tf.random_normal(shape=[parsing_system.numTransitions(), constants.hidden_size], stddev=1.0/math.sqrt(constants.hidden_size)))



            weights_input_words = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.n_Tokens_word * constants.embedding_size], stddev=0.1))
            biases_input_words = tf.Variable(tf.zeros([constants.hidden_size, 1]))

            weights_input_pos = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.n_Tokens_pos * constants.embedding_size], stddev=0.1))
            biases_input_pos = tf.Variable(tf.zeros([constants.hidden_size, 1]))

            weights_input_labels = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.n_Tokens_labels * constants.embedding_size], stddev=0.1))
            biases_input_labels = tf.Variable(tf.zeros([constants.hidden_size, 1]))



            self.prediction_words = self.forward_pass(train_embed_words, weights_input_words, biases_input_words, weights_output_words)
            self.prediction_pos = self.forward_pass(train_embed_pos, weights_input_pos, biases_input_pos, weights_output_pos)
            self.prediction_labels = self.forward_pass(train_embed_labels, weights_input_labels, biases_input_labels, weights_output_labels)


            train_labels = tf.nn.relu(self.train_labels)

            loss_words = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.prediction_words, labels=train_labels)
            loss_pos = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.prediction_pos, labels=train_labels)
            loss_labels = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.prediction_labels, labels=train_labels)


            l2_input_words = constants.lam * tf.nn.l2_loss(weights_input_words)
            l2_biases_words = constants.lam * tf.nn.l2_loss(biases_input_words)

            l2_input_pos = constants.lam * tf.nn.l2_loss(weights_input_pos)
            l2_biases_pos = constants.lam * tf.nn.l2_loss(biases_input_pos)

            l2_input_labels = constants.lam * tf.nn.l2_loss(weights_input_labels)
            l2_biases_labels = constants.lam * tf.nn.l2_loss(biases_input_labels)

            l2_output_words = constants.lam * tf.nn.l2_loss(weights_output_words)
            l2_output_pos = constants.lam * tf.nn.l2_loss(weights_output_pos)
            l2_output_labels = constants.lam * tf.nn.l2_loss(weights_output_labels)

            l2_embed_words = constants.lam * tf.nn.l2_loss(train_embed_words)
            l2_embed_pos = constants.lam * tf.nn.l2_loss(train_embed_words)
            l2_embed_labels = constants.lam * tf.nn.l2_loss(train_embed_words)


            l2_loss = (loss_words + l2_input_words + l2_biases_words + l2_output_words + l2_embed_words) + \
                        (loss_pos + l2_input_pos + l2_biases_pos + l2_output_pos + l2_embed_pos) + \
                        (loss_labels + l2_input_labels + l2_biases_labels + l2_output_labels + l2_embed_labels)

            #------------------------------------------------------------------------------------------------------------------#
  
            # Take average loss over the entire batch
            self.loss = tf.reduce_mean(l2_loss)





            #################====================================================##########################

            ##############  gradient descent computation with gradient clipping ##############
            optimizer = tf.train.GradientDescentOptimizer(constants.learning_rate)
            grads = optimizer.compute_gradients(self.loss)
            clipped_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in grads]
            self.app = optimizer.apply_gradients(clipped_grads)


            ################### Test Predictions #######################################

            self.test_inputs = tf.placeholder(tf.int32, shape=[constants.n_Tokens])

            test_embed = tf.nn.embedding_lookup(self.embeddings, self.test_inputs)
            test_embed = tf.reshape(test_embed, [1, -1])




            #self.test_pred = self.forward_pass(test_embed, weights_input, biases_input, weights_output)


            ############ Use below commented code to run for 2 hidden layers ##########

            #self.test_pred = self.forward_pass_2_hidden(test_embed, weights_input, biases_input, weights2, biases2, weights_output)


            ############ Use below commented code for 2 hidden alternate ################

            #self.test_pred = self.forward_pass_2_hidden_alt(test_embed, weights_input, biases_input, weights2, biases2, weights_output)


            ########### Use below commented code for 3 hidden layer implementation

            #self.test_pred = self.forward_pass_3_hidden(test_embed, weights_input, biases_input, weights2, biases2, weights3, biases3, weights_output)







            # Prediction for the test data

            test_embed_words = tf.slice(test_embed, [0, 0], [constants.n_Tokens_words, test_embed.get_shape()[1]])
            test_embed_words = tf.reshape(test_embed_words, [1, -1])

            test_embed_pos = tf.slice(test_embed, [18, 0], [constants.n_Tokens_pos, test_embed.get_shape()[1]])
            test_embed_pos = tf.reshape(test_embed_pos, [1, -1])

            test_embed_labels = tf.slice(test_embed, [36, 0], [constants.n_Tokens_labels, test_embed.get_shape()[1]])
            test_embed_labels = tf.reshape(test_embed_labels, [1, -1])


            test_pred_words = self.forward_pass(test_embed_words, weights_input_words, biases_input_words, weights_output_words)
            test_pred_pos = self.forward_pass(test_embed_pos, weights_input_pos, biases_input_pos, weights_output_pos)
            test_pred_labels = self.forward_pass(test_embed_labels, weights_input_labels, biases_input_labels, weights_output_labels)

            self.test_pred = (test_pred_words + test_pred_pos + test_pred_labels) / 3





            # intializer
            self.init = tf.global_variables_initializer()
    def _model_build(self):
        with tf.variable_scope(self.scope):
            # Inputs are 4 image frames with shape 84x84
            self.X = tf.placeholder(shape=[None, 84, 84, 4],
                                    dtype=tf.uint8,
                                    name="X")
            self.y = tf.placeholder(shape=[None], dtype=tf.float32, name="y")
            self.isTraining = tf.placeholder(dtype=tf.bool, name="isTraining")
            self.actions = tf.placeholder(shape=[None],
                                          dtype=tf.int32,
                                          name="actions")

            X = tf.to_float(self.X) / 255.0
            batch_size = tf.shape(self.X)[0]

            # CNN with Batchnorm
            conv1 = tf.contrib.layers.conv2d(X, 32, 8, 4, activation_fn=None)
            conv1_bn = tf.contrib.layers.batch_norm(
                conv1, center=True, scale=True, is_training=self.isTraining)
            h1 = tf.nn.relu(conv1_bn, 'relu')
            conv2 = tf.contrib.layers.conv2d(h1, 64, 4, 2, activation_fn=None)
            conv2_bn = tf.contrib.layers.batch_norm(
                conv2, center=True, scale=True, is_training=self.isTraining)
            h2 = tf.nn.relu(conv2_bn, 'relu')
            conv3 = tf.contrib.layers.conv2d(h2, 64, 3, 1, activation_fn=None)
            conv3_bn = tf.contrib.layers.batch_norm(
                conv3, center=True, scale=True, is_training=self.isTraining)
            h3 = tf.nn.relu(conv3_bn, 'relu')

            # Fully Connected Layers
            flattened = tf.contrib.layers.flatten(h3)
            fc1 = tf.contrib.layers.fully_connected(flattened,
                                                    512,
                                                    activation_fn=None)
            fc1_bn = tf.contrib.layers.batch_norm(fc1,
                                                  center=True,
                                                  scale=True,
                                                  is_training=self.isTraining)
            fc_act = tf.nn.relu(fc1_bn, 'relu')
            self.predictions = tf.contrib.layers.fully_connected(
                fc_act, N_ACTION)

            # Original Convolution layers
            # conv1 = tf.contrib.layers.conv2d(X, 32, 8, 4, activation_fn=tf.nn.relu)
            # conv2 = tf.contrib.layers.conv2d(conv1, 64, 4, 2, activation_fn=tf.nn.relu)
            # conv3 = tf.contrib.layers.conv2d(conv2, 64, 3, 1, activation_fn=tf.nn.relu)

            # Original Fully connected layers
            # flattened = tf.contrib.layers.flatten(conv3)
            # fc1 = tf.contrib.layers.fully_connected(flattened, 512)
            # self.predictions = tf.contrib.layers.fully_connected(fc1, N_ACTION)

            # Q value for action-state pairs
            # [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, **2**, 3]
            # 4 batches --> tf.range = [0,1,2,3]
            # After reshape to 1 dimension array --> if want to find Q_value for a action 1
            # --> index of selection action is 4 * (n_batch of action - 1) + actions
            # Example: action 2 @ batch 3 ---> index = (3-1)*4 + 2 = 10 (remember index counted from 0)

            # Get the predictions for the chosen actions only
            gather_indices = tf.range(batch_size) * tf.shape(
                self.predictions)[1] + self.actions
            self.action_predictions = tf.gather(
                tf.reshape(self.predictions, [-1]), gather_indices)

            # Calculate the loss
            self.losses = tf.squared_difference(self.y,
                                                self.action_predictions)
            self.loss = tf.reduce_mean(self.losses)

            self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
            gradients, variables = zip(
                *self.optimizer.compute_gradients(self.loss))
            gradients = [
                None if gradient is None else tf.clip_by_norm(
                    gradient, GRADIENT_CLIPPING_NORM) for gradient in gradients
            ]
            self.train_op = self.optimizer.apply_gradients(
                zip(gradients, variables))
    def create_variables(self):

        with tf.name_scope("model_inputs"):
            # raw state representation
            self.states = tf.placeholder(tf.float32, (None, self.state_dim),
                                         name="states")

        # rollout action based on current policy
        with tf.name_scope("predict_actions"):
            # initialize actor-critic network
            with tf.variable_scope("actor_network"):
                self.policy_outputs = self.actor_network(self.states)
            with tf.variable_scope("critic_network"):
                self.value_outputs = self.critic_network(self.states)

            # predict actions from policy network
            self.action_scores = tf.identity(self.policy_outputs,
                                             name="action_scores")
            # Note 1: tf.multinomial is not good enough to use yet
            # so we don't use self.predicted_actions for now
            self.predicted_actions = tf.multinomial(self.action_scores, 1)

        # get variable list
        actor_network_variables = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope="actor_network")
        critic_network_variables = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope="critic_network")

        # compute loss and gradients
        with tf.name_scope("compute_pg_gradients"):
            # gradients for selecting action from policy network
            self.taken_actions = tf.placeholder(tf.int32, (None, ),
                                                name="taken_actions")
            self.discounted_rewards = tf.placeholder(tf.float32, (None, ),
                                                     name="discounted_rewards")

            with tf.variable_scope("actor_network", reuse=True):
                self.logprobs = self.actor_network(self.states)

            with tf.variable_scope("critic_network", reuse=True):
                self.estimated_values = self.critic_network(self.states)

            # compute policy loss and regularization loss
            self.cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                self.logprobs, self.taken_actions)
            self.pg_loss = tf.reduce_mean(self.cross_entropy_loss)
            self.actor_reg_loss = tf.reduce_sum(
                [tf.reduce_sum(tf.square(x)) for x in actor_network_variables])
            self.actor_loss = self.pg_loss + self.reg_param * self.actor_reg_loss

            # compute actor gradients
            self.actor_gradients = self.optimizer.compute_gradients(
                self.actor_loss, actor_network_variables)
            # compute advantages A(s) = R - V(s)
            self.advantages = tf.reduce_sum(self.discounted_rewards -
                                            self.estimated_values)
            # compute policy gradients
            for i, (grad, var) in enumerate(self.actor_gradients):
                if grad is not None:
                    self.actor_gradients[i] = (grad * self.advantages, var)

            # compute critic gradients
            self.mean_square_loss = tf.reduce_mean(
                tf.square(self.discounted_rewards - self.estimated_values))
            self.critic_reg_loss = tf.reduce_sum([
                tf.reduce_sum(tf.square(x)) for x in critic_network_variables
            ])
            self.critic_loss = self.mean_square_loss + self.reg_param * self.critic_reg_loss
            self.critic_gradients = self.optimizer.compute_gradients(
                self.critic_loss, critic_network_variables)

            # collect all gradients
            self.gradients = self.actor_gradients + self.critic_gradients

            # clip gradients
            for i, (grad, var) in enumerate(self.gradients):
                # clip gradients by norm
                if grad is not None:
                    self.gradients[i] = (tf.clip_by_norm(
                        grad, self.max_gradient), var)

            # summarize gradients
            for grad, var in self.gradients:
                tf.histogram_summary(var.name, var)
                if grad is not None:
                    tf.histogram_summary(var.name + '/gradients', grad)

            # emit summaries
            tf.histogram_summary("estimated_values", self.estimated_values)
            tf.scalar_summary("actor_loss", self.actor_loss)
            tf.scalar_summary("critic_loss", self.critic_loss)
            tf.scalar_summary("reg_loss",
                              self.actor_reg_loss + self.critic_reg_loss)

        # training update
        with tf.name_scope("train_actor_critic"):
            # apply gradients to update actor network
            self.train_op = self.optimizer.apply_gradients(self.gradients)

        self.summarize = tf.merge_all_summaries()
        self.no_op = tf.no_op()
示例#57
0
 def clip_grad_local(grad):
     return tf.clip_by_norm(grad, args.clip_value)
示例#58
0
 def __init__(self,  size_obs, size_act, net_struct = [100, 100, 100, 100], name='dbg'):
     self.tensorboardpath = 'tensorboards/' + name
     self.train_writer = tf.summary.FileWriter(self.tensorboardpath)
     self.ModelPath = 'Models/Imitation' + name
     
     self.mse_train = []
     self.mse_val = []
     self.last_epoch = 0
     size_inpt = 200
     self.obs = tf.placeholder(tf.float32, shape=(None, size_obs))
     self.ret = tf.placeholder(tf.float32, shape=(None))
     act_trn = self.obs
     act_tst = self.obs
     prev_layer_size = size_obs
     #Hidden layers
     self.l2_reg = 1e-8
     self.Q_lr = tf.placeholder(tf.float32, shape=(None))
     self.lr = tf.placeholder(tf.float32, shape=(None))
     if 1:
         for idx, l in enumerate(net_struct):
             act_trn, act_tst = ops.cascade_bn_relu_trn_tst(
                     act_trn, prev_layer_size, l, name='layer' + str(idx), input_tst = act_tst)
             prev_layer_size += l
             
         w = tf.Variable(tf.random_uniform([prev_layer_size, size_act],minval = -1., maxval = 1.), name='net_output_w') * 1e-3
         b = tf.Variable(tf.random_uniform([size_act],minval = -1., maxval = 1.), name='net_output_bias') * 1e-3
     else:
         for idx, l in enumerate(net_struct):
             act_trn = ops.linear(act_trn, l, 'layer' + str(idx))
         w = tf.Variable(tf.random_uniform([l, size_act],minval = -1., maxval = 1.), name='net_output_w') * 1e-2
         b = tf.Variable(tf.random_uniform([size_act],minval = -1., maxval = 1.), name='net_output_bias') * 1e-2
     self.yhat = tf.reshape(tf.matmul(act_trn, w) + b, [-1, size_act])
     self.yhat_tst = tf.reshape(tf.matmul(act_tst, w) + b, [-1, size_act])
     
     self.obs_act = tf.concat((self.obs, self.yhat),1)
     self.Q = Q(size_obs + size_act, tf.stop_gradient(self.obs_act))
             
     self.act = tf.placeholder(tf.float32, shape=(None))
     
     self.l2_loss = tf.reduce_mean(tf.square(self.yhat - self.act))
     self.adv_loss = tf.reduce_mean(tf.square(self.yhat_tst - self.act))
     #-1*tf.gather_nd(output_tst, self.y_raw, axis=1)output_tst[list(np.arange(bs)),self.y_raw]
     
     self.advers = tf.gradients(self.l2_loss, self.obs)
     
     t_vars = tf.trainable_variables()
     net_vars = [var for var in t_vars if 'net_' in var.name]
     self.reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(var)) for var in net_vars])*self.l2_reg
     
     
     
     optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
     gvs = optimizer.compute_gradients(self.l2_loss + self.reg_loss - self.Q.yhat * self.Q_lr + self.Q.l2_loss)
     self.grad_norm = tf.reduce_mean([tf.reduce_mean(grad) for grad, var in gvs if grad is not None])
     clip_norm = 100
     clip_single = 1
     capped_gvs = [(tf.clip_by_value(grad, -1*clip_single,clip_single), var) for grad, var in gvs if grad is not None]
     capped_gvs = [(tf.clip_by_norm(grad, clip_norm), var) for grad, var in capped_gvs if grad is not None]
     self.optimizer = optimizer.apply_gradients(capped_gvs)
     
     #self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.l2_loss)
     
     self.cur_Q_lr = 0
     
     self.session = tf.Session()
     self.session.run(tf.global_variables_initializer())
     self.Saver = tf.train.Saver()
    def __init__(self,
                 batch_size,
                 vocab_size,
                 sentence_size,
                 memory_size,
                 embedding_size,
                 hops=3,
                 max_grad_norm=40.0,
                 nonlin=None,
                 initializer=tf.random_normal_initializer(stddev=0.1),
                 encoding=pos_enc,
                 session=tf.Session(),
                 name='MemN2N'):

        self._batch_size = batch_size
        self._vocab_size = vocab_size
        self._sentence_size = sentence_size
        self._memory_size = memory_size
        self._embedding_size = embedding_size
        self._hops = hops
        self._max_grad_norm = max_grad_norm
        self._nonlin = nonlin
        self._init = initializer
        self._name = name

        self.build_inputs()
        self.build_variables()

        self._opt = tf.train.GradientDescentOptimizer(learning_rate=self._lr)

        self._encoding = tf.constant(encoding(self._sentence_size,
                                              self._embedding_size),
                                     name="encoding")

        logits = self.calc_output(self._stories,
                                  self._queries)  # (batch_size, vocab_size)
        self._logits = logits
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
            logits=logits,
            labels=tf.cast(self._answers, tf.float32),
            name="cross_entropy")
        cross_entropy_sum = tf.reduce_sum(cross_entropy,
                                          name="cross_entropy_sum")

        loss_op = cross_entropy_sum

        grads_and_vars = self._opt.compute_gradients(loss_op)
        grads_and_vars = [(tf.clip_by_norm(g, self._max_grad_norm), v)
                          for g, v in grads_and_vars]
        grads_and_vars = [(add_noise(g), v) for g, v in grads_and_vars]
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self._nil_vars:
                nil_grads_and_vars.append((zero_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))
        train_op = self._opt.apply_gradients(nil_grads_and_vars,
                                             name="train_op")

        predict_op = tf.argmax(logits, 1, name="predict_op")

        self.loss_op = loss_op
        self.predict_op = predict_op
        self.train_op = train_op

        init_op = tf.global_variables_initializer()
        self._sess = session
        self._sess.run(init_op)
示例#60
0
 def max_norm(weights):
     clipped = tf.clip_by_norm(weights, clip_norm=threshold, axes=axes)
     clip_weithts = tf.assign(weights, clipped, name=name)
     tf.add_to_collection(collection, clip_weithts)
     return None