Exemplo n.º 1
0
 def _get_optimizer(self, name):
     from tensorpack.tfutils import optimizer
     from tensorpack.tfutils.gradproc import SummaryGradient, GlobalNormClip, MapGradient
     init_lr = INIT_LEARNING_RATE_A if name == 'actor' else INIT_LEARNING_RATE_C
     import tensorpack.tfutils.symbolic_functions as symbf
     lr = symbf.get_scalar_var('learning_rate/' + name,
                               init_lr,
                               summary=True)
     opt = tf.train.AdamOptimizer(lr)
     logger.info("create opt {}".format(name))
     if name == 'critic':
         gradprocs = [
             MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05),
                         regex='^critic/.*')
         ]
     elif name == 'actor':
         gradprocs = [
             MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1),
                         regex='^actor/.*')
         ]
     else:
         assert (0)
     gradprocs.append(SummaryGradient())
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
Exemplo n.º 2
0
 def clip_vars(self, params):
     for W in self.weights:
         W = tf.clip_by_average_norm(W, params['REG_STRENGTH'])
     for b in self.biases:
         b = tf.clip_by_average_norm(b, params['REG_STRENGTH'])
     self.W_fc = tf.clip_by_average_norm(self.W_fc, params['REG_STRENGTH'])
     self.b_fc = tf.clip_by_average_norm(self.b_fc, params['REG_STRENGTH'])
Exemplo n.º 3
0
 def _get_opt(name, init_lr):
     lr = symbf.get_scalar_var('learning_rate/'+name, init_lr, summary=True)
     opt = tf.train.AdamOptimizer(lr)
     logger.info("create opt {}".format(name))
     gradprocs = [
         # MapGradient(lambda grad: tf.Print(grad, [grad], 'grad {}='.format(grad.op.name), summarize=4)),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1), regex='^actor/.*'),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05), regex='^critic/.*'),
         # GlobalNormClip(40.),
         SummaryGradient(),
     ]
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
    def _optimize(self, loss):
        gradients = tf.gradients(loss, self.train_vars)
        gradients, use_tran_vars = zip(*filter(lambda g: g[0] is not None, (zip(gradients, self.train_vars))))
        clipped_gs = [tf.clip_by_average_norm(g, self.grad_clip) for g in gradients]
        train_op = self.optimizer.apply_gradients(zip(clipped_gs, use_tran_vars))

        gradients = tf.gradients(loss, self.train_vars_no_embedding)
        gradients, use_tran_vars_no_embedding = zip(
            *filter(lambda g: g[0] is not None, (zip(gradients, self.train_vars))))
        clipped_gs = [tf.clip_by_average_norm(g, self.grad_clip) for g in gradients]
        train_op_no_embedding = self.optimizer.apply_gradients(zip(clipped_gs, use_tran_vars_no_embedding))

        return train_op, train_op_no_embedding
Exemplo n.º 5
0
def regularize(output, weights, W_fc, biases, b_fc, params, sess):
    with sess.as_default():
        # if j == 0:
        #     l2_loss = tf.div(tf.sqrt(tf.nn.l2_loss(weights[0])), tf.convert_to_tensor(2.0)).eval()
        #     output.write('l2 loss is %g\n' %l2_loss)
        check_l2 = tf.reduce_sum(weights[0]).eval()
        for W in weights:
            W = tf.clip_by_average_norm(W, params['L2_NORM_CONSTRAINT'])
        for b in biases:
            b = tf.clip_by_average_norm(b, params['L2_NORM_CONSTRAINT'])
        W_fc = tf.clip_by_average_norm(W_fc, params['L2_NORM_CONSTRAINT'])
        b_fc = tf.clip_by_average_norm(b_fc, params['L2_NORM_CONSTRAINT'])
        if np.asscalar(check_l2) > np.asscalar(tf.reduce_sum(weights[0]).eval()):
            output.write('weights clipped\n')
    return weights, W_fc, biases, b_fc
Exemplo n.º 6
0
    def setup_gradients(self, prefix, opt, cost):
        grads = opt.compute_gradients(cost)
        ret_grads = []
        ret_names = []
        ret_apply = []

        for e in grads:
            grad, var = e

            if grad is None or var is None:
                continue

            #print "var: %s, gradient: %s" % (var, grad)
            if self.scope != get_scope_name(var.name):
                continue

            pname = get_param_name(var.name)
            gname = '%s/gradient_%s' % (prefix, pname)
            print "gradient %s -> %s" % (var, gname)

            # get all gradients
            ret_grads.append(grad)
            ret_names.append(gname)

            pl = tf.placeholder(tf.float32, shape=var.get_shape(), name=gname)
            clip = tf.clip_by_average_norm(pl, 1)
            ret_apply.append((clip, var))

            ag = tf.summary.histogram(
                '%s/%s/apply_%s' % (self.scope, prefix, gname), clip)
            self.summary_apply_gradients.append(ag)

        return ret_grads, ret_names, ret_apply
Exemplo n.º 7
0
 def __init__(
         self,
         input_size=INPUT_SIZE,
         hidden_size=800,
         rating_scale=10,  #   800, tanh = 0.042: GradientDescentOptimizer
         optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.0001),
         grad_norm=1,
         activation=tf.nn.sigmoid):
     self.inputs = tf.placeholder(tf.float32, [None, input_size])
     self.hidden1 = tf.layers.dense(self.inputs,
                                    hidden_size,
                                    activation=activation)
     #self.hidden2 = tf.layers.dense(self.hidden1, hidden_size, activation=activation)
     # self.hidden3 = tf.layers.dense(self.hidden2, hidden_size, activation=activation)
     # self.hidden4 = tf.layers.dense(self.hidden3, hidden_size, activation=activation)
     self.prediction_raw = tf.layers.dense(self.hidden1, 1)
     self.prediction = rating_scale * tf.nn.sigmoid(self.prediction_raw)
     self.prediction = tf.squeeze(self.prediction, axis=1)
     self.actual_rating = tf.placeholder(tf.float32, [
         None,
     ])
     self.loss = tf.losses.absolute_difference(labels=self.actual_rating,
                                               predictions=self.prediction)
     model_variables = tf.trainable_variables()
     gradients = optimizer.compute_gradients(self.loss, model_variables)
     clipped_gradients = [(tf.clip_by_average_norm(gradient,
                                                   grad_norm), variable)
                          for gradient, variable in gradients]
     #self.train = optimizer.apply_gradients(clipped_gradients)#
     self.train = optimizer.minimize(self.loss)
Exemplo n.º 8
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False)
     opt = tf.train.AdamOptimizer(lr)
     gradprocs = [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.3))]
     # SummaryGradient()]
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
    def _create_train_op(self):
        self.opt = tf.train.RMSPropOptimizer(
            learning_rate=self.var_learning_rate,
            decay=Config.RMSPROP_DECAY,
            momentum=Config.RMSPROP_MOMENTUM,
            epsilon=Config.RMSPROP_EPSILON)
        
        
        self.log_likelihood_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits_p, labels=self.action_index))
        train_op = self.opt.minimize(self.log_likelihood_cost, global_step=self.global_step)
        train_ops = [train_op] + self.extra_train_ops
        self.train_op = tf.group(*train_ops)

        # for the case of reinforcement learning
        self.selected_action_prob = tf.reduce_sum(self.softmax_p * self.action_index, axis=1)
        self.rl_cost = - tf.log(tf.maximum(self.selected_action_prob, self.log_epsilon)) * self.y_r    
        self.rl_cost = tf.reduce_sum(self.rl_cost, axis=0)

        self.opt_grad = self.opt.compute_gradients(self.rl_cost)
        self.opt_grad_clipped = [(tf.clip_by_average_norm(g, Config.GRAD_CLIP_NORM),v) for g,v in self.opt_grad]
        train_rl_op = self.opt.apply_gradients(self.opt_grad_clipped)

        #train_rl_op = self.opt.minimize(self.rl_cost, global_step=self.global_step)
        train_rl_ops = [train_rl_op] + self.extra_train_ops
        self.train_rl_op = tf.group(*train_rl_ops)
Exemplo n.º 10
0
    def average_gradients(self, tower_grads):
        average_grads = []
        for grad_and_vars in zip(*tower_grads):
            # Note that each grad_and_vars looks like the following:
            #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
            grads = []
            # Average over the 'tower' dimension.
            g, _ = grad_and_vars[0]

            for g, _ in grad_and_vars:
                expanded_g = tf.expand_dims(g, 0)
                grads.append(expanded_g)
            grad = tf.concat(grads, axis=0)
            grad = tf.reduce_mean(grad, 0)

            # Keep in mind that the Variables are redundant because they are shared
            # across towers. So .. we will just return the first tower's pointer to
            # the Variable.
            v = grad_and_vars[0][1]
            grad_and_var = (grad, v)
            average_grads.append(grad_and_var)
        # clip
        if self.cfg.clip_gradient:
            gradients, variables = zip(*average_grads)
            gradients = [
                None if gradient is None else tf.clip_by_average_norm(
                    gradient, self.cfg.clip_gradient_value)
                for gradient in gradients
            ]
            average_grads = zip(gradients, variables)
        return average_grads
Exemplo n.º 11
0
def regularize(output, weights, W_fc, biases, b_fc, params, sess):
    with sess.as_default():
        # if j == 0:
        #     l2_loss = tf.div(tf.sqrt(tf.nn.l2_loss(weights[0])), tf.convert_to_tensor(2.0)).eval()
        #     output.write('l2 loss is %g\n' %l2_loss)
        check_l2 = tf.reduce_sum(weights[0]).eval()
        for W in weights:
            W = tf.clip_by_average_norm(W, params['L2_NORM_CONSTRAINT'])
        for b in biases:
            b = tf.clip_by_average_norm(b, params['L2_NORM_CONSTRAINT'])
        W_fc = tf.clip_by_average_norm(W_fc, params['L2_NORM_CONSTRAINT'])
        b_fc = tf.clip_by_average_norm(b_fc, params['L2_NORM_CONSTRAINT'])
        if np.asscalar(check_l2) > np.asscalar(
                tf.reduce_sum(weights[0]).eval()):
            output.write('weights clipped\n')
    return weights, W_fc, biases, b_fc
Exemplo n.º 12
0
 def optimizer(self):
     opt = tf.train.AdamOptimizer(self.cfg.learning_rate)
     return optimizer.apply_grad_processors(opt, [
         gradproc.MapGradient(
             lambda grad: tf.clip_by_average_norm(grad, 0.3)),
         gradproc.SummaryGradient()
     ])
Exemplo n.º 13
0
 def clip_func(grads):
     clipped_grads = []
     for g, v in grads:
         if g is None:
             # Choosing not to add gradients to list if they're None. Both adding/not adding are valid choices.
             # clipped_grads.append((None, v))
             continue
         if not v.trainable:
             continue
         if clip_type in ['none', 'None']:
             pass
         elif clip_type == 'value':
             g = tf.clip_by_value(g, clip_bounds[0], clip_bounds[1])
         elif clip_type == 'norm':
             g = tf.clip_by_norm(g, clip_bounds)
         elif clip_type == 'global_norm':
             g = tf.clip_by_global_norm(g, clip_bounds)
         elif clip_type == 'average_norm':
             g = tf.clip_by_average_norm(g, clip_bounds)
         else:
             raise ValueError(
                 "Unrecognized gradient clipping method: {}.".format(
                     clip_type))
         clipped_grads.append((g, v))
     return clipped_grads
Exemplo n.º 14
0
    def __init__(self, nA,
                 learning_rate,decay,grad_clip,entropy_beta,
                 state_shape=[84,84,4],
                 master=None, device_name='/gpu:0', scope_name='master'):
        with tf.device(device_name) :
            self.state = tf.placeholder(tf.float32,[None]+state_shape)
            block, self.scope  = ActorCritic._build_shared_block(self.state,scope_name)
            self.policy, self.log_softmax_policy = ActorCritic._build_policy(block,nA,scope_name)
            self.value = ActorCritic._build_value(block,scope_name)

            self.train_vars = sorted(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope.name), key=lambda v:v.name)
            if( master is not None ) :
                self.sync_op= self._sync_op(master)
                self.action = tf.placeholder(tf.int32,[None,])
                self.target_value = tf.placeholder(tf.float32,[None,])

                advantage = self.target_value - self.value
                entropy = tf.reduce_sum(-1. * self.policy * self.log_softmax_policy,axis=1)
                log_p_s_a = tf.reduce_sum(self.log_softmax_policy * tf.one_hot(self.action,nA),axis=1)

                self.policy_loss = tf.reduce_mean(tf.stop_gradient(advantage)*log_p_s_a)
                self.entropy_loss = tf.reduce_mean(entropy)
                self.value_loss = tf.reduce_mean(advantage**2)

                loss = -self.policy_loss - entropy_beta* self.entropy_loss + self.value_loss
                self.gradients = tf.gradients(loss,self.train_vars)
                clipped_gs = [tf.clip_by_average_norm(g,grad_clip) for g in self.gradients]
                self.train_op = master.optimizer.apply_gradients(zip(clipped_gs,master.train_vars))
            else :
                #self.optimizer = tf.train.AdamOptimizer(learning_rate,beta1=BETA)
                self.optimizer = tf.train.RMSPropOptimizer(learning_rate,decay=decay,use_locking=True)
Exemplo n.º 15
0
    def _create_graph(self):
        self.x = tf.placeholder(
            tf.float32, [None, self.img_height, self.img_width, self.img_channels], name='X')
        self.y = tf.placeholder(tf.float32, [None, self.num_actions], name='Y')

        self.var_beta = tf.placeholder(tf.float32, name='beta', shape=[])
        self.var_learning_rate = tf.placeholder(tf.float32, name='lr', shape=[])

        self.global_step = tf.Variable(0, trainable=False, name='step')

        # As implemented in A3C paper
        self.n1 = self.conv2d_layer(self.x, 8, 32, 'conv11', strides=[1, 4, 4, 1])
        self.n2 = self.conv2d_layer(self.n1, 4, 64, 'conv12', strides=[1, 2, 2, 1])
        self.n3 = self.conv2d_layer(self.n2, 3, 64, 'conv13', strides=[1, 1, 1, 1])

        self.action_index = tf.placeholder(tf.float32, [None, self.num_actions])
        _input = self.n3

        flatten_input_shape = _input.get_shape()
        nb_elements = flatten_input_shape[1] * flatten_input_shape[2] * flatten_input_shape[3]

        self.flat = tf.reshape(_input, shape=[-1, nb_elements._value])
        self.d1 = self.dense_layer(self.flat, 512, 'dense1', func=tf.nn.relu)
        self.d2 = self.dense_layer(self.d1, self.num_actions, 'logits_p', func=None)
        #self.logits_v = tf.squeeze(self.dense_layer(self.d1, 1, 'logits_v', func=None), axis=[1])
        #self.cost_v = 0.5 * tf.reduce_sum(tf.square(self.y_r - self.logits_v), axis=0)
        #self.logits_p = self.dense_layer(self.d1, self.num_actions, 'logits_p', func=None)
        '''
        if Config.USE_LOG_SOFTMAX:
            self.softmax_p = tf.nn.softmax(self.logits_p)
            self.log_softmax_p = tf.nn.log_softmax(self.logits_p)
            self.log_selected_action_prob = tf.reduce_sum(self.log_softmax_p * self.action_index, axis=1)

            self.cost_p_1 = self.log_selected_action_prob * (self.y_r - tf.stop_gradient(self.logits_v))
            self.cost_p_2 = -1 * self.var_beta * \
                        tf.reduce_sum(self.log_softmax_p * self.softmax_p, axis=1)
        else:
            self.softmax_p = (tf.nn.softmax(self.logits_p) + Config.MIN_POLICY) / (1.0 + Config.MIN_POLICY * self.num_actions)
            self.selected_action_prob = tf.reduce_sum(self.softmax_p * self.action_index, axis=1)

            self.cost_p_1 = tf.log(tf.maximum(self.selected_action_prob, self.log_epsilon)) \
                        * (self.y_r - tf.stop_gradient(self.logits_v))
            self.cost_p_2 = -1 * self.var_beta * \
                        tf.reduce_sum(tf.log(tf.maximum(self.softmax_p, self.log_epsilon)) *
                                      self.softmax_p, axis=1)
        '''
        self.cost_all = tf.losses.mean_squared_error(self.y, self.d2)
        self.opt = tf.train.RMSPropOptimizer(
            learning_rate=self.var_learning_rate,
            decay=Config.RMSPROP_DECAY,
            momentum=Config.RMSPROP_MOMENTUM,
            epsilon=Config.RMSPROP_EPSILON)

        if Config.USE_GRAD_CLIP:
                self.opt_grad = self.opt.compute_gradients(self.cost_all)
                self.opt_grad_clipped = [(tf.clip_by_average_norm(g, Config.GRAD_CLIP_NORM),v) for g,v in self.opt_grad]
                self.train_op = self.opt.apply_gradients(self.opt_grad_clipped)
        else:
                self.train_op = self.opt.minimize(self.cost_all, global_step=self.global_step)
Exemplo n.º 16
0
    def _get_optimizer(self):
        lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
        opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)

        gradprocs = [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
                     SummaryGradient()]
        opt = optimizer.apply_grad_processors(opt, gradprocs)
        return opt
Exemplo n.º 17
0
    def _get_optimizer(self):
        lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
        opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)

        gradprocs = [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
                     SummaryGradient()]
        opt = optimizer.apply_grad_processors(opt, gradprocs)
        return opt
Exemplo n.º 18
0
 def _get_opt(name, init_lr):
     lr = symbf.get_scalar_var('learning_rate/' + name,
                               init_lr,
                               summary=True)
     opt = tf.train.AdamOptimizer(lr)
     logger.info("create opt {}".format(name))
     gradprocs = [
         # MapGradient(lambda grad: tf.Print(grad, [grad], 'grad {}='.format(grad.op.name), summarize=4)),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1),
                     regex='^actor/.*'),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05),
                     regex='^critic/.*'),
         # GlobalNormClip(40.),
         SummaryGradient(),
     ]
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
Exemplo n.º 19
0
    def optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=0.001, trainable=False)
        opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)

        gradprocs = [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
                     SummaryGradient()]
        opt = optimizer.apply_grad_processors(opt, gradprocs)
        return opt
Exemplo n.º 20
0
 def optimizer(self):
     lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False)
     # opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     opt = tf.train.AdamOptimizer(lr)
     return optimizer.apply_grad_processors(
         opt, [
             # gradproc.GlobalNormClip(2.0),
             gradproc.MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.5)),
               gradproc.SummaryGradient()])
Exemplo n.º 21
0
 def _clip_grads(self, grads):
     if self.clip_norm_type == 'ignore':
         return grads
     elif self.clip_norm_type == 'global':
         return tf.clip_by_global_norm(grads, self.clip_norm)[0]
     elif self.clip_norm_type == 'avg':
         return tf.clip_by_average_norm(grads, self.clip_norm)[0]
     elif self.clip_norm_type == 'local':
         return [tf.clip_by_norm(g, self.clip_norm) for g in grads]
Exemplo n.º 22
0
    def __init__(self,
                 nA,
                 learning_rate,
                 decay,
                 grad_clip,
                 entropy_beta,
                 state_shape=[84, 84, 4],
                 master=None,
                 device_name='/gpu:0',
                 scope_name='master'):
        with tf.device(device_name):
            self.state = tf.placeholder(tf.float32, [None] + state_shape)
            block, self.scope = ActorCritic._build_shared_block(
                self.state, scope_name)
            self.policy, self.log_softmax_policy = ActorCritic._build_policy(
                block, nA, scope_name)
            self.value = ActorCritic._build_value(block, scope_name)

            self.train_vars = sorted(tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, self.scope.name),
                                     key=lambda v: v.name)
            if (master is not None):
                self.sync_op = self._sync_op(master)
                self.action = tf.placeholder(tf.int32, [
                    None,
                ])
                self.target_value = tf.placeholder(tf.float32, [
                    None,
                ])

                advantage = self.target_value - self.value
                entropy = tf.reduce_sum(-1. * self.policy *
                                        self.log_softmax_policy,
                                        axis=1)
                log_p_s_a = tf.reduce_sum(self.log_softmax_policy *
                                          tf.one_hot(self.action, nA),
                                          axis=1)

                self.policy_loss = tf.reduce_mean(
                    tf.stop_gradient(advantage) * log_p_s_a)
                self.entropy_loss = tf.reduce_mean(entropy)
                self.value_loss = tf.reduce_mean(advantage**2)

                loss = -self.policy_loss - entropy_beta * self.entropy_loss + self.value_loss
                self.gradients = tf.gradients(loss, self.train_vars)
                clipped_gs = [
                    tf.clip_by_average_norm(g, grad_clip)
                    for g in self.gradients
                ]
                self.train_op = master.optimizer.apply_gradients(
                    zip(clipped_gs, master.train_vars))
            else:
                #self.optimizer = tf.train.AdamOptimizer(learning_rate,beta1=BETA)
                self.optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                           decay=decay,
                                                           use_locking=True)
Exemplo n.º 23
0
    def __train_ops(self):
        if Config.DUAL_RMSPROP:
            self.opt_p = tf.train.RMSPropOptimizer(
                learning_rate=self.var_learning_rate,
                decay=Config.RMSPROP_DECAY,
                momentum=Config.RMSPROP_MOMENTUM,
                epsilon=Config.RMSPROP_EPSILON)

            self.opt_v = tf.train.RMSPropOptimizer(
                learning_rate=self.var_learning_rate,
                decay=Config.RMSPROP_DECAY,
                momentum=Config.RMSPROP_MOMENTUM,
                epsilon=Config.RMSPROP_EPSILON)
        else:
            self.cost_all = self.cost_p + self.cost_v
            self.opt = tf.train.RMSPropOptimizer(
                learning_rate=self.var_learning_rate,
                decay=Config.RMSPROP_DECAY,
                momentum=Config.RMSPROP_MOMENTUM,
                epsilon=Config.RMSPROP_EPSILON)

        if Config.USE_GRAD_CLIP:
            if Config.DUAL_RMSPROP:
                self.opt_grad_v = self.opt_v.compute_gradients(self.cost_v)
                self.opt_grad_v_clipped = [
                    (tf.clip_by_norm(g, Config.GRAD_CLIP_NORM), v)
                    for g, v in self.opt_grad_v if not g is None
                ]
                self.train_op_v = self.opt_v.apply_gradients(
                    self.opt_grad_v_clipped)

                self.opt_grad_p = self.opt_p.compute_gradients(self.cost_p)
                self.opt_grad_p_clipped = [
                    (tf.clip_by_norm(g, Config.GRAD_CLIP_NORM), v)
                    for g, v in self.opt_grad_p if not g is None
                ]
                self.train_op_p = self.opt_p.apply_gradients(
                    self.opt_grad_p_clipped)
                self.train_op = [self.train_op_p, self.train_op_v]
            else:
                self.opt_grad = self.opt.compute_gradients(self.cost_all)
                self.opt_grad_clipped = [
                    (tf.clip_by_average_norm(g, Config.GRAD_CLIP_NORM), v)
                    for g, v in self.opt_grad
                ]
                self.train_op = self.opt.apply_gradients(self.opt_grad_clipped)
        else:
            if Config.DUAL_RMSPROP:
                self.train_op_v = self.opt_p.minimize(
                    self.cost_v, global_step=self.global_step)
                self.train_op_p = self.opt_v.minimize(
                    self.cost_p, global_step=self.global_step)
                self.train_op = [self.train_op_p, self.train_op_v]
            else:
                self.train_op = self.opt.minimize(self.cost_all,
                                                  global_step=self.global_step)
Exemplo n.º 24
0
 def _compute_current_gradients(self):
     if GoConfig.USE_GRAD_CLIP:
         if GoConfig.DUAL_RMSPROP:
             # value
             self.variables_to_train_v = self.get_trainable_variables(
                 'resnet_v2_50, OutputNet/logits_p, global_step')
             self.opt_grad_v = self.opt_v.compute_gradients(
                 self.cost_v, var_list=self.variables_to_train_v)
             self.opt_grad_v_clipped = [
                 (tf.clip_by_norm(g, GoConfig.GRAD_CLIP_NORM), v)
                 for g, v in self.opt_grad_v if not g is None
             ]
             self.tower_v_grads.append(self.opt_grad_v_clipped)
             # policy
             self.variables_to_train_p = self.get_trainable_variables(
                 'resnet_v2_50, OutputNet/logits_v, global_step')
             self.opt_grad_p = self.opt_p.compute_gradients(
                 self.cost_p, var_list=self.variables_to_train_p)
             self.opt_grad_p_clipped = [
                 (tf.clip_by_norm(g, GoConfig.GRAD_CLIP_NORM), v)
                 for g, v in self.opt_grad_p if not g is None
             ]
             self.tower_p_grads.append(self.opt_grad_p_clipped)
         else:
             # all: value + policy
             self.variables_to_train_all = self.get_trainable_variables(
                 'resnet_v2_50, global_step')
             self.opt_grad = self.opt.compute_gradients(
                 self.cost_all, var_list=self.variables_to_train_all)
             self.opt_grad_clipped = [
                 (tf.clip_by_average_norm(g, GoConfig.GRAD_CLIP_NORM), v)
                 for g, v in self.opt_grad
             ]
             self.tower_all_grads.append(self.opt_grad_clipped)
     else:
         if GoConfig.DUAL_RMSPROP:
             # value
             self.variables_to_train_v = self.get_trainable_variables(
                 'resnet_v2_50, OutputNet/logits_p, global_step')
             self.opt_grad_v = self.opt_v.compute_gradients(
                 self.cost_v, var_list=self.variables_to_train_v)
             self.tower_v_grads.append(self.opt_grad_v)
             # policy
             self.variables_to_train_p = self.get_trainable_variables(
                 'resnet_v2_50, OutputNet/logits_v, global_step')
             self.opt_grad_p = self.opt_p.compute_gradients(
                 self.cost_p, var_list=self.variables_to_train_p)
             self.tower_p_grads.append(self.opt_grad_p)
         else:
             # all: value + policy
             self.variables_to_train_all = self.get_trainable_variables(
                 'resnet_v2_50, global_step')
             self.opt_grad = self.opt.compute_gradients(
                 self.cost_all, var_list=self.variables_to_train_all)
             self.tower_all_grads.append(self.opt_grad)
Exemplo n.º 25
0
    def optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=1e-3, trainable=False)
        # This will also put the summary in tensorboard, stat.json and print in terminal,
        # but this time without moving average
        tf.summary.scalar('lr', lr)
        # opt = tf.train.MomentumOptimizer(lr, 0.9)
        opt = tf.train.AdamOptimizer(lr)

        return optimizer.apply_grad_processors(
            opt, [gradproc.MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.5)),
                  gradproc.SummaryGradient()])
Exemplo n.º 26
0
    def testClipByAverageNormZero(self):
        # No norm clipping when average clip_norm = 0
        with self.test_session():
            x = tf.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
            # Average norm = 0, no changes
            np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
            clip_norm = 0.9
            ans = tf.clip_by_average_norm(x, clip_norm)
            tf_ans = ans.eval()

        self.assertAllClose(np_ans, tf_ans)
Exemplo n.º 27
0
    def testClipByAverageNormNotClipped(self):
        # No norm clipping when average clip_norm >= 0.83333333
        with self.test_session():
            x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
            # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
            np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]]
            clip_norm = 0.9
            ans = tf.clip_by_average_norm(x, clip_norm)
            tf_ans = ans.eval()

        self.assertAllClose(np_ans, tf_ans)
Exemplo n.º 28
0
    def testClipByAverageNormNotClipped(self):
        # No norm clipping when average clip_norm >= 0.83333333
        with self.test_session():
            x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
            # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
            np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]]
            clip_norm = 0.9
            ans = tf.clip_by_average_norm(x, clip_norm)
            tf_ans = ans.eval()

        self.assertAllClose(np_ans, tf_ans)
Exemplo n.º 29
0
    def testClipByAverageNormZero(self):
        # No norm clipping when average clip_norm = 0
        with self.test_session():
            x = tf.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
            # Average norm = 0, no changes
            np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
            clip_norm = 0.9
            ans = tf.clip_by_average_norm(x, clip_norm)
            tf_ans = ans.eval()

        self.assertAllClose(np_ans, tf_ans)
Exemplo n.º 30
0
 def get_gradients(self, loss_or_grads, params):
   """
   Note
   ----
   The returned gradients may contain None value
   """
   # check valid algorithm
   if self.algorithm is None or \
   not hasattr(self.algorithm, 'compute_gradients') or \
   not hasattr(self.algorithm, 'apply_gradients'):
     raise RuntimeError("Optimizer is None, or doesn't has attributes: "
                        "compute_gradients and apply_gradients.")
   with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE) as scope:
     scope_name = scope.name
     # get the gradient
     grads_var = self.algorithm.compute_gradients(loss_or_grads,
                                                  var_list=params)
     grads_var = {g: v for g, v in grads_var if g is not None}
     grads = list(grads_var.keys())
     params = list(grads_var.values())
     # ====== clipnorm ====== #
     if self.clipnorm is not None:
       if self.clip_alg == 'norm':
         grads = [tf.clip_by_norm(g, self.clipnorm)
                  for g in grads]
       elif self.clip_alg == 'total_norm':
         grads, _ = tf.clip_by_global_norm(grads, self.clipnorm)
       elif self.clip_alg == 'avg_norm':
         grads = [tf.clip_by_average_norm(g, self.clipnorm)
                  for g in grads]
       else:
         raise ValueError("Unknown norm clipping algorithm: '%s'" % self.clip_alg)
     # ====== clipvalue ====== #
     if self.clipvalue is not None:
       grads = [tf.clip_by_value(g, -self.clipvalue, self.clipvalue)
                for g in grads]
     # ====== get final norm value ====== #
     self._norm = add_roles(tf.global_norm(grads, name="GradientNorm"),
                            GradientsNorm)
   # ====== setting Optimizer roles ====== #
   for v in get_all_variables(scope=scope_name):
     add_roles(v, roles=OptimizerVariable)
   return [(g, p) for g, p in zip(grads, params)]
Exemplo n.º 31
0
    def _buildOptimizer(self, 
                        learningRate, decay, momentum, epsilon, clipNorm):
        """
        - Creates a graph node for applying reducing the loss (self.applyGrads)
       
        learningRate : Learning rate to be applied to gradients 
        decay        : Discount for past gradients
        momentum     : Gradient momentum
        epsilon      : non zero offset
        clipNorm     : Maximum average norm gradient allowed
        """

        optimizer = tf.train.RMSPropOptimizer(
                                          learning_rate = learningRate
                                        , decay         = decay
                                        , momentum      = momentum
                                        , epsilon       = epsilon
                                    )
        
        grads = optimizer.compute_gradients(loss)
        clippedGrads = [(tf.clip_by_average_norm(grad, clipNorm),var) for grad,var in grads]
        self.applyGrads = optimizer.apply_gradients(clippedGrads)
Exemplo n.º 32
0
  def clip_gradients(grads_and_vars):
    """This method was migrated from GradientClipOptimizer which has been
       deprecated"""
    clip_method = th.clip_method
    bound = th.clip_threshold
    assert clip_method in ('norm', 'value', 'global_norm', 'avg_norm')

    if clip_method in ('norm', 'value', 'avg_norm'):
      if clip_method == 'norm':
        method = lambda g: tf.clip_by_norm(g, bound)
      elif clip_method == 'value':
        method = lambda g: tf.clip_by_value(g, -bound, bound)
      else:
        method = lambda g: tf.clip_by_average_norm(g, bound)
      grads_and_vars = [(method(grad), var) for grad, var in grads_and_vars]
    else:
      assert clip_method == 'global_norm'
      grads = [g for g, _ in grads_and_vars]
      clipped_grads, _ = tf.clip_by_global_norm(grads, bound)
      vars_ = [v for _, v in grads_and_vars]
      grads_and_vars = list(zip(clipped_grads, vars_))

    return grads_and_vars
Exemplo n.º 33
0
  def _compute_gradients(self, loss, var_list=None):
    # Sanity check
    assert isinstance(loss, tf.Tensor)

    # Compute gradients using default method
    grads_and_vars = self._tf_optimizer.compute_gradients(
      loss, var_list=var_list)

    # Deal with NaN if necessary
    if hub.clip_nan_protection: grads_and_vars = [
      (self._deal_with_nan(grad), var) for grad, var in grads_and_vars]

    # Apply lr decay if necessary
    lr_decay = hub.clip_lr_multiplier
    if lr_decay < 1.0:
      assert lr_decay > 0
      grads_and_vars = [(grad * lr_decay, var) for grad, var in grads_and_vars]

    # Clip gradient if necessary
    if self._threshold > 0:
      bound = self._threshold
      if self._method in ('norm', 'value', 'avg_norm'):
        if self._method == 'norm':
          method = lambda g: tf.clip_by_norm(g, bound)
        elif self._method == 'value':
          method = lambda g: tf.clip_by_value(g, -bound, bound)
        else: method = lambda g: tf.clip_by_average_norm(g, bound)
        grads_and_vars = [(method(grad), var) for grad, var in grads_and_vars]
      else:
        assert self._method == 'global_norm'
        grads = [g for g, _ in grads_and_vars]
        clipped_grads, _ = tf.clip_by_global_norm(grads, self._threshold)
        vars_ = [v for _, v in grads_and_vars]
        grads_and_vars = list(zip(clipped_grads, vars_))

    return grads_and_vars
Exemplo n.º 34
0
 def get_gradient_processor(self):
     return [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
             SummaryGradient()]
Exemplo n.º 35
0
    def _create_graph(self):
        self.x = tf.placeholder(tf.float32, [None, self.img_height, self.img_width, self.img_channels], name='X')
        self.odometry = tf.placeholder(tf.float32, [None, 7], name='odometry')

        self.y_r = tf.placeholder(tf.float32, [None], name='Yr')
        self.action_index = tf.placeholder(tf.float32, [None, self.num_actions])

        self.var_beta = tf.placeholder(tf.float32, name='beta', shape=[])
        self.var_learning_rate = tf.placeholder(tf.float32, name='lr', shape=[])

        self.global_step = tf.Variable(0, trainable=False, name='step')

        # As implemented in A3C paper
        self.n1 = self.conv2d_layer(self.x, 8, 16, 'conv11', strides=[1, 4, 4, 1])
        self.n2 = self.conv2d_layer(self.n1, 4, 32, 'conv12', strides=[1, 2, 2, 1])

        # _input = self.n2
        # flatten_input_shape = _input.get_shape()
        # nb_elements = flatten_input_shape[1] * flatten_input_shape[2] * flatten_input_shape[3]

        self.flat = tf.contrib.layers.flatten(self.n2)
        self.d1 = self.dense_layer(self.flat, 256, 'dense1')

        self.logits_v = tf.squeeze(self.dense_layer(self.d1, 1, 'logits_v', func=None), axis=[1])
        self.cost_v = 0.5 * tf.reduce_sum(tf.square(self.y_r - self.logits_v), axis=0)
        self.logits_p = self.dense_layer(self.d1, self.num_actions, 'logits_p')

        if Config.USE_LOG_SOFTMAX:
            self.softmax_p = tf.nn.softmax(self.logits_p)
            self.log_softmax_p = tf.nn.log_softmax(self.logits_p)
            self.log_selected_action_prob = tf.reduce_sum(self.log_softmax_p * self.action_index, axis=1)

            self.cost_p_1 = self.log_selected_action_prob * (self.y_r - tf.stop_gradient(self.logits_v))
            self.cost_p_2 = -1 * self.var_beta * \
                        tf.reduce_sum(self.log_softmax_p * self.softmax_p, axis=1)
        else:
            self.softmax_p = (tf.nn.softmax(self.logits_p) + Config.MIN_POLICY) / (1.0 + Config.MIN_POLICY * self.num_actions)
            self.selected_action_prob = tf.reduce_sum(self.softmax_p * self.action_index, axis=1)

            self.cost_p_1 = tf.log(tf.maximum(self.selected_action_prob, self.log_epsilon)) * (self.y_r - tf.stop_gradient(self.logits_v))
            self.cost_p_2 = -1 * self.var_beta * tf.reduce_sum(tf.log(tf.maximum(self.softmax_p, self.log_epsilon)) * self.softmax_p, axis=1)
        
        self.cost_p_1_agg = tf.reduce_sum(self.cost_p_1, axis=0)
        self.cost_p_2_agg = tf.reduce_sum(self.cost_p_2, axis=0)
        self.cost_p = -(self.cost_p_1_agg + self.cost_p_2_agg)
        
        if Config.DUAL_RMSPROP:
            self.opt_p = tf.train.RMSPropOptimizer(
                learning_rate=self.var_learning_rate,
                decay=Config.RMSPROP_DECAY,
                momentum=Config.RMSPROP_MOMENTUM,
                epsilon=Config.RMSPROP_EPSILON)

            self.opt_v = tf.train.RMSPropOptimizer(
                learning_rate=self.var_learning_rate,
                decay=Config.RMSPROP_DECAY,
                momentum=Config.RMSPROP_MOMENTUM,
                epsilon=Config.RMSPROP_EPSILON)
        else:
            self.cost_all = self.cost_p + self.cost_v
            self.opt = tf.train.RMSPropOptimizer(
                learning_rate=self.var_learning_rate,
                decay=Config.RMSPROP_DECAY,
                momentum=Config.RMSPROP_MOMENTUM,
                epsilon=Config.RMSPROP_EPSILON)

        if Config.USE_GRAD_CLIP:
            if Config.DUAL_RMSPROP:
                self.opt_grad_v = self.opt_v.compute_gradients(self.cost_v)
                self.opt_grad_v_clipped = [(tf.clip_by_norm(g, Config.GRAD_CLIP_NORM),v) 
                                            for g,v in self.opt_grad_v if not g is None]
                self.train_op_v = self.opt_v.apply_gradients(self.opt_grad_v_clipped)
            
                self.opt_grad_p = self.opt_p.compute_gradients(self.cost_p)
                self.opt_grad_p_clipped = [(tf.clip_by_norm(g, Config.GRAD_CLIP_NORM),v)
                                            for g,v in self.opt_grad_p if not g is None]
                self.train_op_p = self.opt_p.apply_gradients(self.opt_grad_p_clipped)
                self.train_op = [self.train_op_p, self.train_op_v]
            else:
                self.opt_grad = self.opt.compute_gradients(self.cost_all)
                self.opt_grad_clipped = [(tf.clip_by_average_norm(g, Config.GRAD_CLIP_NORM),v) for g,v in self.opt_grad]
                self.train_op = self.opt.apply_gradients(self.opt_grad_clipped)
        else:
            if Config.DUAL_RMSPROP:
                self.train_op_v = self.opt_p.minimize(self.cost_v, global_step=self.global_step)
                self.train_op_p = self.opt_v.minimize(self.cost_p, global_step=self.global_step)
                self.train_op = [self.train_op_p, self.train_op_v]
            else:
                self.train_op = self.opt.minimize(self.cost_all, global_step=self.global_step)
Exemplo n.º 36
0
                nthreads=1,
                random_crop=config.RANDOM_CROP).data_pipeline(1)
            static_inpainted_images = model.build_static_infer_graph(
                static_images, config, name='static_view/%d' % i)

    # training settings
    lr = tf.get_variable('lr',
                         shape=[],
                         trainable=False,
                         initializer=tf.constant_initializer(1e-4))
    d_optimizer = tf.train.AdamOptimizer(lr, beta1=0.5, beta2=0.9)
    g_optimizer = d_optimizer

    # gradient processor
    if config.GRADIENT_CLIP:
        gradient_processor = lambda grad_var: (tf.clip_by_average_norm(
            grad_var[0], config.GRADIENT_CLIP_VALUE), grad_var[1])
    else:
        gradient_processor = None

    # log dir
    log_prefix = 'model_logs/' + '_'.join([
        ng.date_uid(),
        socket.gethostname(), config.DATASET, 'MASKED'
        if config.GAN_WITH_MASK else 'NORMAL', config.GAN, config.LOG_DIR
    ])

    # train discriminator with secondary trainer, should initialize before primary trainer.
    discriminator_training_callback = ng.callbacks.SecondaryTrainer(
        pstep=1,
        optimizer=d_optimizer,
        var_list=d_vars,
Exemplo n.º 37
0
def train(params, output, train_eval_bundle, dev_bundle, batches_x, batches_y, key_array, embed_keys, train_x, train_y):
    with tf.Graph().as_default():
        x, y_, dropout, weights, biases, W_fc, b_fc, log_loss, correct_prediction = define_nn(params)
        if params['Adagrad']:
            train_step = tf.train.AdagradOptimizer(params['LEARNING_RATE']).minimize(cross_entropy)
        else:
            train_step = tf.train.AdamOptimizer(params['LEARNING_RATE']).minimize(cross_entropy)

        saver = tf.train.Saver(tf.all_variables())
        #run session
        output.write( 'Initializing session...\n\n')
        sess = tf.Session(config=tf.ConfigProto(inter_op_parallelism_threads=2,
                          intra_op_parallelism_threads=3, use_per_session_threads=True))
        sess.run(tf.initialize_all_variables())
        output.write( 'Running session...\n\n')
        output.write('setup time: %g\n'%(time.clock()))
        best_dev_accuracy = 0
        train_softmax = sum_prob(x, y_, train_eval_bundle, params, log_loss, dropout, sess)
        initial_accuracy = sum_prob(x, y_, train_eval_bundle, params, correct_prediction, dropout, sess)
        output.write("initial accuracy %g softmax%g \n"%(initial_accuracy, train_softmax))
        output.write('start time: ' + str(time.clock()) + '\n')
        time_index = time.clock()
        epoch_time = 0
        for i in range(params['EPOCHS']):
            params['epoch'] = i + 1
            for j in range(len(batches_x)):
                train_step.run(feed_dict={x: batches_x[j],
                                          y_: batches_y[j],
                                          dropout: params['TRAIN_DROPOUT']},
                                          session = sess)
                #apply l2 clipping to weights and biases
                with sess.as_default():
                    # print weights[0].eval()
                    if j == 0:
                        l2_loss = tf.div(tf.sqrt(tf.nn.l2_loss(weights[0])), tf.convert_to_tensor(2.0)).eval()
                        output.write('l2 loss is %g' %l2_loss)
                    check_l2 = tf.reduce_sum(weights[0]).eval()
                    for W in weights:
                        W = tf.clip_by_average_norm(W, params['L2_NORM_CONSTRAINT'])
                    for b in biases:
                        b = tf.clip_by_average_norm(b, params['L2_NORM_CONSTRAINT'])
                    W_fc = tf.clip_by_average_norm(W_fc, params['L2_NORM_CONSTRAINT'])
                    b_fc = tf.clip_by_average_norm(b_fc, params['L2_NORM_CONSTRAINT'])
                    if np.asscalar(check_l2) > np.asscalar(tf.reduce_sum(weights[0]).eval()):
                        output.write('weights clipped\n')
            if params['BATCH_SIZE'] == 1:
                batches_x, batches_y = shuffle_in_unison(batches_x, batches_y)
            else:
                batches_x, batches_y = scramble_batches(train_x, train_y, params, embed_keys, train_eval_bundle[2], train_eval_bundle[3])
            train_softmax = sum_prob(x, y_, train_eval_bundle, params, log_loss, dropout, sess)

            train_accuracy = sum_prob(x, y_, train_eval_bundle, params, correct_prediction, dropout, sess)

            output.write("epoch %d, training accuracy %g, training softmax error %g \n"
                %(i, train_accuracy, train_softmax))

            dev_accuracy = sum_prob(x, y_, dev_bundle, params, correct_prediction, dropout, sess)
            dev_softmax = sum_prob(x, y_, dev_bundle, params, log_loss, dropout, sess)
            output.write("dev set accuracy %g, softmax %g \n"%(dev_accuracy, dev_softmax))

            if dev_accuracy > best_dev_accuracy:
                saver.save(sess, 'text_cnn_run' + params['OUTPUT_FILE_NAME'], global_step = params['epoch'])
                best_dev_accuracy = dev_accuracy

            if dev_accuracy < best_dev_accuracy - .02:
                #early stop if accuracy drops significantly
                break
            output.write('epoch time : ' + str(time.clock() - time_index))
            epoch_time += time.clock() - time_index
            time_index = time.clock()
            output.write('. elapsed: ' + str(time.clock()) + '\n')
        # if params['TEST']:
        #     output.write('Testing:\n')
        #     test_x, test_y = sort_examples_by_length(test_x, test_y)
        #     test_bundle = batch(test_x, test_y, params, embed_keys) + (len(test_y),)
        #     saver.restore
        #     test_accuracy = sum_prob(x, y_, test_bundle, params, correct_prediction, dropout, sess)
        #     output.write('Final test accuracy: %g' %test_accuracy)

        return epoch_time
Exemplo n.º 38
0
    def _create_graph(self):
        self.x = tf.placeholder(
            tf.float32, [None, self.player_count * self.player_dimension],
            name='X')
        self.y_r = tf.placeholder(tf.float32, [None], name='Yr')
        self.action_index = tf.placeholder(tf.float32,
                                           [None, self.num_actions])

        self.var_beta = tf.placeholder(tf.float32, name='beta', shape=[])
        self.var_learning_rate = tf.placeholder(tf.float32,
                                                name='lr',
                                                shape=[])

        self.global_step = tf.Variable(0, trainable=False, name='step')

        self.d1 = self.dense_layer(self.x, 128, 'dense1')
        self.d2 = self.dense_layer(self.d1, 16, 'dense2')
        self.d3 = self.dense_layer(self.d2, 128, 'dense3')

        self.logits_v = tf.squeeze(self.dense_layer(self.d3,
                                                    1,
                                                    'logits_v',
                                                    func=None),
                                   axis=[1])
        self.cost_v = 0.5 * tf.reduce_sum(tf.square(self.y_r - self.logits_v),
                                          axis=0)

        self.logits_p = self.dense_layer(self.d3,
                                         self.num_actions,
                                         'logits_p',
                                         func=None)
        if Config.USE_LOG_SOFTMAX:
            self.softmax_p = tf.nn.softmax(self.logits_p)
            self.log_softmax_p = tf.nn.log_softmax(self.logits_p)
            self.log_selected_action_prob = tf.reduce_sum(self.log_softmax_p *
                                                          self.action_index,
                                                          axis=1)

            self.cost_p_1 = self.log_selected_action_prob * (
                self.y_r - tf.stop_gradient(self.logits_v))
            self.cost_p_2 = -1 * self.var_beta * \
                        tf.reduce_sum(self.log_softmax_p * self.softmax_p, axis=1)
        else:
            self.softmax_p = (tf.nn.softmax(self.logits_p) + Config.MIN_POLICY
                              ) / (1.0 + Config.MIN_POLICY * self.num_actions)
            self.selected_action_prob = tf.reduce_sum(self.softmax_p *
                                                      self.action_index,
                                                      axis=1)

            self.cost_p_1 = tf.log(tf.maximum(self.selected_action_prob, self.log_epsilon)) \
                        * (self.y_r - tf.stop_gradient(self.logits_v))
            self.cost_p_2 = -1 * self.var_beta * \
                        tf.reduce_sum(tf.log(tf.maximum(self.softmax_p, self.log_epsilon)) *
                                      self.softmax_p, axis=1)

        self.cost_p_1_agg = tf.reduce_sum(self.cost_p_1, axis=0)
        self.cost_p_2_agg = tf.reduce_sum(self.cost_p_2, axis=0)
        self.cost_p = -(self.cost_p_1_agg + self.cost_p_2_agg)

        if Config.DUAL_RMSPROP:
            self.opt_p = tf.train.RMSPropOptimizer(
                learning_rate=self.var_learning_rate,
                decay=Config.RMSPROP_DECAY,
                momentum=Config.RMSPROP_MOMENTUM,
                epsilon=Config.RMSPROP_EPSILON)

            self.opt_v = tf.train.RMSPropOptimizer(
                learning_rate=self.var_learning_rate,
                decay=Config.RMSPROP_DECAY,
                momentum=Config.RMSPROP_MOMENTUM,
                epsilon=Config.RMSPROP_EPSILON)
        else:
            self.cost_all = self.cost_p + self.cost_v
            self.opt = tf.train.RMSPropOptimizer(
                learning_rate=self.var_learning_rate,
                decay=Config.RMSPROP_DECAY,
                momentum=Config.RMSPROP_MOMENTUM,
                epsilon=Config.RMSPROP_EPSILON)

        if Config.USE_GRAD_CLIP:
            if Config.DUAL_RMSPROP:
                self.opt_grad_v = self.opt_v.compute_gradients(self.cost_v)
                self.opt_grad_v_clipped = [
                    (tf.clip_by_norm(g, Config.GRAD_CLIP_NORM), v)
                    for g, v in self.opt_grad_v if not g is None
                ]
                self.train_op_v = self.opt_v.apply_gradients(
                    self.opt_grad_v_clipped)

                self.opt_grad_p = self.opt_p.compute_gradients(self.cost_p)
                self.opt_grad_p_clipped = [
                    (tf.clip_by_norm(g, Config.GRAD_CLIP_NORM), v)
                    for g, v in self.opt_grad_p if not g is None
                ]
                self.train_op_p = self.opt_p.apply_gradients(
                    self.opt_grad_p_clipped)
                self.train_op = [self.train_op_p, self.train_op_v]
            else:
                self.opt_grad = self.opt.compute_gradients(self.cost_all)
                self.opt_grad_clipped = [
                    (tf.clip_by_average_norm(g, Config.GRAD_CLIP_NORM), v)
                    for g, v in self.opt_grad
                ]
                self.train_op = self.opt.apply_gradients(self.opt_grad_clipped)
        else:
            if Config.DUAL_RMSPROP:
                self.train_op_v = self.opt_p.minimize(
                    self.cost_v, global_step=self.global_step)
                self.train_op_p = self.opt_v.minimize(
                    self.cost_p, global_step=self.global_step)
                self.train_op = [self.train_op_p, self.train_op_v]
            else:
                self.train_op = self.opt.minimize(self.cost_all,
                                                  global_step=self.global_step)
         static_fnames = val_fnames[i:i+1]
         static_images = ng.data.DataFromFNames(
             static_fnames, config.IMG_SHAPES, nthreads=1,
             random_crop=config.RANDOM_CROP, random_flip=config.RANDOM_FLIP).data_pipeline(1)
         static_inpainted_images = model.build_static_infer_graph(
             static_images[0], config, name='static_view/%d' % i, exclusionmask=images[exclusionmask_index] if config.EXC_MASKS else None)
 # training settings
 lr = tf.get_variable(
     'lr', shape=[], trainable=False,
     initializer=tf.constant_initializer(1e-4))
 d_optimizer = tf.train.AdamOptimizer(lr, beta1=0.5, beta2=0.9)
 g_optimizer = d_optimizer
 # gradient processor
 if config.GRADIENT_CLIP:
     gradient_processor = lambda grad_var: (
         tf.clip_by_average_norm(grad_var[0], config.GRADIENT_CLIP_VALUE),
         grad_var[1])
 else:
     gradient_processor = None
 # log dir
 log_prefix = 'model_logs/' + '_'.join([
     ng.date_uid(), socket.gethostname(), config.DATASET,
     'MASKED' if config.GAN_WITH_MASK else 'NORMAL',
     config.GAN,config.LOG_DIR])
 # train discriminator with secondary trainer, should initialize before
 # primary trainer.
 discriminator_training_callback = ng.callbacks.SecondaryTrainer(
     pstep=1,
     optimizer=d_optimizer,
     var_list=d_vars,
     max_iters=5,
Exemplo n.º 40
0
  def _post_process_grad(self, grad, var, global_info):
    """
    :param tf.Tensor grad:
    :param tf.Variable var:
    :param WrapOptimizer._GetGlobalInfo global_info:
    :return: new grad, apply grad opts
    :rtype: tf.Tensor, dict[str]
    """
    updater_opts = self._get_updater_opts_from_var(var)

    accum_grad_multiple_num_steps = updater_opts.get(
      "accum_grad_multiple_step", self.config.int("accum_grad_multiple_step", 0))
    grad_noise = updater_opts.get("gradient_noise", self.config.float("gradient_noise", 0.0))
    grad_clip = updater_opts.get("gradient_clip", self.config.float("gradient_clip", 0.0))
    # E.g. https://github.com/openai/baselines/blob/master/baselines/deepq/simple.py:
    #   grad_norm_clipping=10 -> tf.clip_by_norm
    grad_clip_norm = updater_opts.get("gradient_clip_norm", self.config.float("gradient_clip_norm", 0.0))
    grad_clip_avg_norm = updater_opts.get("gradient_clip_avg_norm", self.config.float("gradient_clip_avg_norm", 0.0))
    grad_clip_global_norm = updater_opts.get(
      "gradient_clip_global_norm", self.config.float("gradient_clip_global_norm", 0.0))
    global_norm_tag = updater_opts.get(
      "global_norm_tag", self.config.value("global_norm_tag", None))
    grad_clip_global_norm_tag = updater_opts.get(
      "gradient_clip_global_norm_tag", self.config.value("gradient_clip_global_norm_tag", global_norm_tag))
    grad_norm_to_clip_to_zero = updater_opts.get(
      "grad_norm_to_clip_to_zero", self.config.float("grad_norm_to_clip_to_zero", 0.0))
    maximize_grad_norm = updater_opts.get("maximize_grad_norm", self.config.float("maximize_grad_norm", 0))

    if maximize_grad_norm:
      grad_ext = global_info.get_maximize_grad_norm_grad(maximize_grad_norm, var)
      if grad_ext is not None:
        grad += grad_ext

    if accum_grad_multiple_num_steps >= 1:
      grad = accum_grad_multiple_step(
        grad, var, train_step=self.global_train_step, num_accum_steps=accum_grad_multiple_num_steps)

    if updater_opts.get("debug_grad_summaries", self.config.bool_or_other("debug_grad_summaries", False)):
      from TFUtil import variable_summaries, get_base_name, reuse_name_scope_of_tensor
      with reuse_name_scope_of_tensor(grad, prefix="grads/"):
        variable_summaries(grad, name="grad_of_%s" % get_base_name(var))
      with reuse_name_scope_of_tensor(var, prefix="vars/"):
        variable_summaries(var, name=get_base_name(var))

    # Also see tf.contrib.layers.optimizers.optimize_loss() for reference.
    if grad_noise:
      assert grad_noise > 0
      from TFUtil import add_scaled_noise_to_gradients
      with tf.name_scope("grad_noise"):
        (grad, var), = add_scaled_noise_to_gradients([(grad, var)], grad_noise)
    if grad_clip:
      assert grad_clip > 0
      with tf.name_scope("grad_clip"):
        grad = tf.clip_by_value(grad, -grad_clip, grad_clip)
    if grad_clip_norm:
      assert grad_clip_norm > 0
      with tf.name_scope("grad_clip_norm"):
        grad = tf.clip_by_norm(grad, grad_clip_norm)
    if grad_clip_avg_norm:
      assert grad_clip_avg_norm > 0
      with tf.name_scope("grad_clip_avg_norm"):
        grad = tf.clip_by_average_norm(grad, grad_clip_avg_norm)
    if grad_clip_global_norm:
      assert grad_clip_global_norm > 0
      with tf.name_scope("grad_clip_global_norm"):
        grad = global_info.clip_by_global_norm(
          grad, clip_norm=grad_clip_global_norm, global_norm_tag=grad_clip_global_norm_tag)
    if updater_opts.get("gradient_nan_inf_filter", self.config.bool("gradient_nan_inf_filter", False)):
      from TFUtil import nan_to_num
      grad = nan_to_num(grad, nan_num=0.0, inf_num=0.0)
    if grad_norm_to_clip_to_zero:
      with tf.name_scope("grad_norm_to_clip_to_zero"):
        grad = global_info.set_zero_on_high_global_norm(
          grad, grad_norm_threshold=grad_norm_to_clip_to_zero, global_norm_tag=global_norm_tag)

    updater_opts.assert_all_read()

    opt_key, _ = self._get_optimizer_item_for_variable(var)
    apply_grad_opts = {
      "opt_key": opt_key, "accum_grad_multiple_num_steps": accum_grad_multiple_num_steps}
    return grad, apply_grad_opts