def clip_by_norm(gvs, grad_norm_thresh, scope="grad_clip"): """ Clip gradients by norm, and scope. Args: gvs: list of gradient variable tuples grad_norm_thresh: norm threshold to clip scope: scope for the clip operation """ new_gvs = [] if scope: with tf.name_scope(scope): #gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \ # for gv in gvs if gv[0]] #return gvs for gv in gvs: if gv[0]: new_gvs.append((tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1])) else: print("no gradient for %s" % gv[1].op.name) #raise new_gvs.append(gv) return new_gvs else: #gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \ # for gv in gvs if gv[0]] #return gvs for gv in gvs: if gv[0]: new_gvs.append((tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1])) else: print("no gradient for %s" % gv[1].op.name) #raise new_gvs.append(gv) return new_gvs
def two_linear( self, xin, linear_size, residual, dropout_keep_prob, max_norm, batch_norm, dtype, idx ): """ Make a bi-linear block with optional residual connection Args xin: the batch that enters the block linear_size: integer. The size of the linear units residual: boolean. Whether to add a residual connection dropout_keep_prob: float [0,1]. Probability of dropping something out max_norm: boolean. Whether to clip weights to 1-norm batch_norm: boolean. Whether to do batch normalization dtype: type of the weigths. Usually tf.float32 idx: integer. Number of layer (for naming/scoping) Returns y: the batch after it leaves the block """ with vs.variable_scope( "two_linear_"+str(idx) ) as scope: input_size = int(xin.get_shape()[1]) # Linear 1 w2 = tf.get_variable( name="w2_"+str(idx), initializer=kaiming, shape=[input_size, linear_size], dtype=dtype) b2 = tf.get_variable( name="b2_"+str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype) w2 = tf.clip_by_norm(w2,1) if max_norm else w2 y = tf.matmul(xin, w2) + b2 if batch_norm: y = tf.layers.batch_normalization(y,training=self.isTraining,name="batch_normalization1"+str(idx)) y = tf.nn.relu( y ) y = tf.nn.dropout( y, dropout_keep_prob ) # Linear 2 w3 = tf.get_variable( name="w3_"+str(idx), initializer=kaiming, shape=[linear_size, linear_size], dtype=dtype) b3 = tf.get_variable( name="b3_"+str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype) w3 = tf.clip_by_norm(w3,1) if max_norm else w3 y = tf.matmul(y, w3) + b3 if batch_norm: y = tf.layers.batch_normalization(y,training=self.isTraining,name="batch_normalization2"+str(idx)) y = tf.nn.relu( y ) y = tf.nn.dropout( y, dropout_keep_prob ) # Residual every 2 blocks y = (xin + y) if residual else y return y
def create_variables_for_optimization(self): with tf.name_scope("optimization"): with tf.name_scope("masker"): self.mask = tf.sequence_mask(self.seq_len, self.num_step) self.mask = tf.reshape(tf.cast(self.mask, tf.float32), (-1,)) if self.loss_function == "cross_entropy": self.pl_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logit, labels=self.actions_flatten) elif self.loss_function == "l2": self.one_hot_actions = tf.one_hot(self.actions_flatten, self.num_actions) self.pl_loss = tf.reduce_mean((self.probs - self.one_hot_actions) ** 2, axis=1) else: raise ValueError("loss function type is not defined") self.pl_loss = tf.multiply(self.pl_loss, self.mask) self.pl_loss = tf.reduce_mean(tf.multiply(self.pl_loss, self.returns_flatten)) self.entropy = tf.multiply(self.entropy, self.mask) self.entropy = tf.reduce_mean(self.entropy) self.loss = self.pl_loss - self.entropy_bonus * self.entropy self.trainable_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="policy_network") self.gradients = self.optimizer.compute_gradients(self.loss, var_list=self.trainable_variables) self.clipped_gradients = [(tf.clip_by_norm(grad, self.max_gradient), var) for grad, var in self.gradients] self.train_op = self.optimizer.apply_gradients(self.clipped_gradients, self.global_step) self.grad_norm = tf.global_norm([grad for grad, var in self.gradients]) self.var_norm = tf.global_norm(self.trainable_variables)
def _clip_gradients(self, grads_and_vars): """Clip gradients. Args: grads_and_vars (list): list of tuples of `(grads, vars)` Returns: clipped_grads_and_vars (list): list of tuple of `(clipped grads, vars)` """ # TODO: Optionally add gradient noise clipped_grads_and_vars = [] # Clip gradient norm for grad, var in grads_and_vars: if grad is not None: clipped_grads_and_vars.append( (tf.clip_by_norm(grad, clip_norm=self.clip_grad_norm), var)) # Clip gradient # for grad, var in grads_and_vars: # if grad is not None: # clipped_grads_and_vars.append( # (tf.clip_by_value(grad, # clip_value_min=-self.clip_grad_norm, # clip_value_max=self.clip_grad_norm), # var)) # TODO: Add histograms for variables, gradients (norms) # self._tensorboard(trainable_vars) return clipped_grads_and_vars
def create_update_op_backup(self): optimizer = tf.train.MomentumOptimizer(self.config.learning_rate, self.config.momentum) #self.update_op = optimizer.minimize(self.loss) g_list = optimizer.compute_gradients(self.loss) # 000 g_list_new = [(tf.clip_by_norm(g, 5), v) for g, v in g_list] # g_list_new = [] # for g, v in g_list: # g_not_finite = tf.logical_or(tf.is_nan(g), tf.is_inf(g)) # 001 # g = tf.select(g_not_finite, tf.zeros_like(g), g) # g = tf.clip_by_norm(g, 5) # g = tf.select(g_not_finite, 0.1*v, g) # 002 # g = tf.convert_to_tensor(g) # g_norm = tf.sqrt(tf.reduce_sum(tf.square(g))) # g = tf.select(g_not_finite, 0.1*v, g*5/g_norm) # g_list_new.append((g, v)) self.update_op = optimizer.apply_gradients(g_list_new) return
def __init__(self, sess, pred_network, env, stat, conf, target_network=None): super(DeepQ, self).__init__(sess, pred_network, target_network, env, stat, conf) # Optimizer with tf.variable_scope('optimizer'): self.targets = tf.placeholder('float32', [None], name='target_q_t') self.actions = tf.placeholder('int64', [None], name='action') actions_one_hot = tf.one_hot(self.actions, self.env.action_size, 1.0, 0.0, name='action_one_hot') pred_q = tf.reduce_sum(self.pred_network.outputs * actions_one_hot, reduction_indices=1, name='q_acted') self.delta = self.targets - pred_q if self.max_delta and self.min_delta: self.delta = tf.clip_by_value(self.delta, self.min_delta, self.max_delta, name='clipped_delta') self.loss = tf.reduce_mean(tf.square(self.delta), name='loss') self.learning_rate_op = tf.maximum(self.learning_rate_minimum, tf.train.exponential_decay( self.learning_rate, self.stat.t_op, self.learning_rate_decay_step, self.learning_rate_decay, staircase=True)) optimizer = tf.train.RMSPropOptimizer( self.learning_rate_op, momentum=0.95, epsilon=0.01) grads_and_vars = optimizer.compute_gradients(self.loss) for idx, (grad, var) in enumerate(grads_and_vars): if grad is not None: grads_and_vars[idx] = (tf.clip_by_norm(grad, self.max_grad_norm), var) self.optim = optimizer.apply_gradients(grads_and_vars)
def dpg(q_max, a_max, dqda_clipping=None, clip_norm=False, name="DpgLearning"): """Implements the Deterministic Policy Gradient (DPG) loss as a TensorFlow Op. This op implements the loss for the `actor`, the `critic` can instead be updated by minimizing the `value_ops.td_learning` loss. See "Deterministic Policy Gradient Algorithms" by Silver, Lever, Heess, Degris, Wierstra, Riedmiller (http://proceedings.mlr.press/v32/silver14.pdf). Args: q_max: Tensor holding Q-values generated by Q network with the input of (state, a_max) pair, shape `[B]`. a_max: Tensor holding the optimal action, shape `[B, action_dimension]`. dqda_clipping: `int` or `float`, clips the gradient dqda element-wise between `[-dqda_clipping, dqda_clipping]`. clip_norm: Whether to perform dqda clipping on the vector norm of the last dimension, or component wise (default). name: name to prefix ops created within this op. Returns: A namedtuple with fields: * `loss`: a tensor containing the batch of losses, shape `[B]`. * `extra`: a namedtuple with fields: * `q_max`: Tensor holding the optimal Q values, `[B]`. * `a_max`: Tensor holding the optimal action, `[B, action_dimension]`. * `dqda`: Tensor holding the derivative dq/da, `[B, action_dimension]`. Raises: ValueError: If `q_max` doesn't depend on `a_max` or if `dqda_clipping <= 0`. """ # DPG op. with tf.name_scope(name, values=[q_max, a_max]): # Calculate the gradient dq/da. dqda = tf.gradients([q_max], [a_max])[0] # Check that `q_max` depends on `a_max`. if dqda is None: raise ValueError("q_max needs to be a function of a_max") # Clipping the gradient dq/da. if dqda_clipping is not None: if dqda_clipping <= 0: raise ValueError("dqda_clipping should be bigger than 0, {} found" .format(dqda_clipping)) if clip_norm: dqda = tf.clip_by_norm(dqda, dqda_clipping, axes=-1) else: dqda = tf.clip_by_value(dqda, -1. * dqda_clipping, dqda_clipping) # Target_a ensures correct gradient calculated during backprop. target_a = dqda + a_max # Stop the gradient going through Q network when backprop. target_a = tf.stop_gradient(target_a) # Gradient only go through actor network. loss = 0.5 * tf.reduce_sum(tf.square(target_a - a_max), axis=-1) return base_ops.LossOutput( loss, DPGExtra(q_max=q_max, a_max=a_max, dqda=dqda))
def __init__(self, optimizer, devices, input_placeholders, per_device_batch_size, build_loss, logdir, grad_norm_clipping=None): self.optimizer = optimizer self.devices = devices self.batch_size = per_device_batch_size * len(devices) self.per_device_batch_size = per_device_batch_size self.input_placeholders = input_placeholders self.build_loss = build_loss self.logdir = logdir # First initialize the shared loss network with tf.variable_scope(TOWER_SCOPE_NAME): self._shared_loss = build_loss(*input_placeholders) # Then setup the per-device loss graphs that use the shared weights self._batch_index = tf.placeholder(tf.int32) # Split on the CPU in case the data doesn't fit in GPU memory. with tf.device("/cpu:0"): data_splits = zip( *[tf.split(ph, len(devices)) for ph in input_placeholders]) self._towers = [] for device, device_placeholders in zip(self.devices, data_splits): self._towers.append(self._setup_device(device, device_placeholders)) avg = average_gradients([t.grads for t in self._towers]) if grad_norm_clipping: for i, (grad, var) in enumerate(avg): if grad is not None: avg[i] = (tf.clip_by_norm(grad, grad_norm_clipping), var) self._train_op = self.optimizer.apply_gradients(avg)
def train(lr, total_loss, global_step): # Variables that affect learning rate. # Compute gradients. #with tf.control_dependencies([loss_averages_op]): opt = tf.train.GradientDescentOptimizer(lr) grads = opt.compute_gradients(total_loss) # Add histograms for gradients. for i, (grad, var) in enumerate(grads): if grad is not None: tf.histogram_summary(var.op.name + '/gradients', grad) grads[i] = (tf.clip_by_norm(grad, 5), var) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') return train_op
def build_model(self, mode, embedding_method): self.build_memory() # self.skip_model = skip.load_model() self.skip_model = None self.reg_loss = tf.mul(tf.nn.l2_loss(self.T), self.gamma, name='regularization_loss') self.data_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(self.probs, self.target, name='data_loss') self.loss = tf.add(self.reg_loss, self.data_loss, name = 'total_loss') self.average_loss = tf.reduce_mean(self.loss) self.opt = tf.train.GradientDescentOptimizer(self.lr) self.correct_prediction = tf.equal(self.target, tf.argmax(self.probs,1)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32)) grads_and_vars = self.opt.compute_gradients(self.loss, self.params) cliped_grads_and_vars = [(tf.clip_by_norm(gv[0], 40), gv[1]) for gv in grads_and_vars] inc_op = self.global_step.assign_add(1) with tf.control_dependencies([inc_op]): self.apply_grad_op = self.opt.apply_gradients(cliped_grads_and_vars) self.saver = tf.train.Saver() # At Inference mode if mode == 'inference': if embedding_method == 'word2vec': self.saver.restore(self.sess, './demo/MN_shortcut/model.ckpt') elif embedding_method == 'skip': print 'Restoring model from ./demo/MN_shortcut/skip_plot_40.ckpt' self.saver.restore(self.sess, './demo/MN_shortcut/skip_plot_40.ckpt') else: tf.initialize_all_variables().run()
def _init_train(self): readout = tf.stop_gradient(self.target_network.readout) # 0 if terminal, max(prediction) if not future_rewards = tf.reduce_max(readout, reduction_indices=[1,]) * (1 - self.terminals) tf.histogram_summary("rewards_future", future_rewards) wanted = self.rewards + self.settings['discount'] * future_rewards tf.histogram_summary("rewards_wanted", wanted) current = tf.reduce_sum( self.act_network.readout * self.action_mask, reduction_indices=[1,], name="rewards_current" ) tf.histogram_summary("rewards_current", current) loss = tf.square(current - wanted) self.error = tf.reduce_sum(loss, name="prediction_error") tf.scalar_summary('error', self.error) grad_vars = self.settings['optimizer'].compute_gradients(self.error) clipped_grad_vars = [(tf.clip_by_norm(grad, 10) if grad else None, var) for (grad, var) in grad_vars] for grad, var in clipped_grad_vars: tf.histogram_summary(var.name, var) if grad: tf.histogram_summary(var.name + "_clipgrad", grad) self.train_op = self.settings['optimizer'].apply_gradients(clipped_grad_vars, global_step=self.global_step)
def build_model(self): self.build_memory() self.W = tf.Variable(tf.random_normal([self.edim, self.nwords], stddev=self.init_std)) # d*V 매핑 z = tf.matmul(self.hid[-1], self.W) # 마지막 hop의 output.. (o^k + u^k) # [batch_size, edim] * [edim,nwords] => [batch_size, nwords] self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=z,labels= self.target) # loss # target은 [batch_size, nwords] one-hot encoding 되어 있음. self.lr = tf.Variable(self.current_lr) self.opt = tf.train.GradientDescentOptimizer(self.lr) params = [self.A, self.B, self.C, self.T_A, self.T_B, self.W] grads_and_vars = self.opt.compute_gradients(self.loss,params) clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], self.max_grad_norm), gv[1]) \ for gv in grads_and_vars] # List of (gradient, variable) pairs // gradient는 clip_by_norm ! # clip by norm 해서 각각 gradients update inc = self.global_step.assign_add(1) # global step 하나 올려주고 with tf.control_dependencies([inc]): # 반드시 [] 안에 있는 value를 먼저 실행한 후, 아래 command가 실행된다. self.optim = self.opt.apply_gradients(clipped_grads_and_vars) # 원래는 train.optimizer 안에 param으로 global_step 쓰는데 clip_by_norm 하느라 쪼개는 바람에 # 이를 보장해주기 위해서 이렇게 하는듯 tf.global_variables_initializer().run() self.saver = tf.train.Saver()
def make_accumulated_gradients(self): reset_accum_grads = [] new_grads_and_vars = [] # 1. Prepare accum_grads self.accum_grads = {} self.add_accum_grads = {} for step, network in enumerate(self.networks): grads_and_vars = self.global_optim.compute_gradients(network.total_loss, network.w.values()) _add_accum_grads = [] for grad, var in tuple(grads_and_vars): if grad is not None: shape = grad.get_shape().as_list() name = 'accum/%s' % "/".join(var.name.split(':')[0].split('/')[-3:]) if step == 0: self.accum_grads[name] = tf.Variable( tf.zeros(shape), trainable=False, name=name) global_v = global_var[re.sub(r'.*\/A3C_\d+\/', '', var.name)] new_grads_and_vars.append((tf.clip_by_norm(self.accum_grads[name].ref(), self.max_grad_norm), global_v)) reset_accum_grads.append(self.accum_grads[name].assign(tf.zeros(shape))) _add_accum_grads.append(tf.assign_add(self.accum_grads[name], grad)) # 2. Add gradient to accum_grads self.add_accum_grads[step] = tf.group(*_add_accum_grads)
def make_tf_Linv(layer, V_shape, c_shape, lr, act=tf.nn.tanh): """ builds graph for layer-local training of V and c """ with tf.name_scope('layer'+str(layer)+'_inv') as scope: V = tf.get_variable(scope+'V', shape=V_shape, dtype=tf.float32, initializer=tf.orthogonal_initializer(0.95)) #V = tf.get_variable(scope+'V', shape=V_shape, dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32)) c = tf.get_variable(scope+'c', shape=c_shape, dtype=tf.float32, initializer=tf.constant_initializer(0.)) W = tf.placeholder(tf.float32, shape=[V_shape[1], V_shape[0]], name='W') b = tf.placeholder(tf.float32, shape=[1, V_shape[0]], name='b') x_0 = tf.placeholder(tf.float32, shape=[None, V_shape[1]], name='input') fx = act(tf.matmul(x_0, W) + b) loss = 0.5*tf.reduce_mean((act(tf.matmul(fx, V) + c) - x_0)**2, name='loss') s1 = tf.summary.scalar('log_loss'+str(layer), tf.log(loss)) s2 = tf.summary.histogram('V'+str(layer), V) s3 = tf.summary.histogram('c'+str(layer), c) opt = tf.train.RMSPropOptimizer(lr) gvs = opt.compute_gradients(loss, var_list=[V, c]) sg = [tf.summary.scalar('norm_grad'+var.name[-3], tf.nn.l2_loss(grad)) for grad, var in gvs] # var.name = 'namescope/V:0' and we want just 'V' clipped_gvs = [(tf.clip_by_norm(grad, 100.), var) for grad, var in gvs] return opt.apply_gradients(clipped_gvs), tf.summary.merge([s1] + sg)
def adv_target_net2(input_images, clip_norm=1.5): with tf.variable_scope('adv_encoder') as scope: width = 32 height = 32 batch_size = 128 # code_length = 6000 input_images = input_images/255 # clip bound box mean, var = tf.nn.moments(input_images, axes=tuple(range(1,len(input_images.shape))), keep_dims=True) normed_input_images = (input_images-mean)/var # Convolutional layer 1 conv1 = tf.layers.conv2d(inputs=normed_input_images, filters=32, kernel_size=(5, 5), # kernel_initializer=tf.contrib.layers.xavier_initializer(), activation=tf.nn.leaky_relu, padding='SAME', name='adv_conv1') # maxpool layer1 maxpool1 = tf.layers.max_pooling2d(conv1, (3,3), (2,2), 'SAME') # Convolutional layer 2 conv2 = tf.layers.conv2d(inputs=maxpool1, filters=64, kernel_size=(5, 5), # kernel_initializer=tf.contrib.layers.xavier_initializer(), activation=tf.nn.leaky_relu, padding='SAME', name='adv_conv2') # maxpool layer2 maxpool2 = tf.layers.max_pooling2d(conv2, (3,3), (2,2), 'SAME') deconv1 = tf.layers.conv2d_transpose(maxpool2, 32, (5,5), (2,2), 'SAME', activation=tf.nn.leaky_relu, name='adv_deconv1') adv_mask = tf.layers.conv2d_transpose(deconv1, 3, (5,5), (2,2), 'SAME', activation=tf.nn.tanh, name='adv_deconv2') scaled_adv_mask = tf.clip_by_norm(adv_mask, clip_norm, axes=list(range(1,len(adv_mask.shape)))) adv_images = tf.clip_by_value(scaled_adv_mask+input_images,0,1) output_images = tf.reshape(adv_images, (batch_size, height, width, 3)) * 255.0 dif = adv_images - input_images tf.summary.image('adv_images', output_images) # Reconstruction L2 loss mean_square_error = tf.reduce_mean(tf.square(dif), axis=list(range(1,len(dif.shape)))) loss = tf.reduce_mean(mean_square_error, name='dis_loss') return loss, output_images
def create_variables(self): self.target_q_network = self.q_network.copy(scope="target_network") # FOR REGULAR ACTION SCORE COMPUTATION with tf.name_scope("taking_action"): self.observation = self.q_network.input_placeholder("observation") self.action_scores = tf.identity(self.q_network(self.observation), name="action_scores") tf.histogram_summary("action_scores", self.action_scores) self.predicted_actions = tf.argmax(self.action_scores, dimension=1, name="predicted_actions") with tf.name_scope("estimating_future_rewards"): # FOR PREDICTING TARGET FUTURE REWARDS self.next_observation = self.q_network.input_placeholder("next_observation") self.next_observation_mask = tf.placeholder(tf.float32, (None,), name="next_observation_mask") self.next_action_scores = self.target_q_network(self.next_observation) tf.histogram_summary("target_action_scores", self.next_action_scores) self.rewards = tf.placeholder(tf.float32, (None,), name="rewards") target_values = \ tf.reduce_max(self.next_action_scores, reduction_indices=[1,]) * self.next_observation_mask self.future_rewards = self.rewards + self.discount_rate * target_values with tf.name_scope("q_value_precition"): # FOR PREDICTION ERROR self.action_mask = tf.placeholder(tf.float32, self.q_network.output_shape(), name="action_mask") self.masked_action_scores = tf.reduce_sum(self.action_scores * self.action_mask, reduction_indices=[1,]) temp_diff = self.masked_action_scores - self.future_rewards self.prediction_error = tf.reduce_mean(tf.square(temp_diff)) gradients = self.optimizer.compute_gradients( self.prediction_error, var_list=self.q_network.variables()) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, 5), var) # Add histograms for gradients. for grad, var in gradients: tf.histogram_summary(var.name, var) if grad is not None: tf.histogram_summary(var.name + '/gradients', grad) self.train_op = self.optimizer.apply_gradients(gradients) # UPDATE TARGET NETWORK with tf.name_scope("target_network_update"): self.target_network_update = [] for v_source, v_target in zip(self.q_network.variables(), self.target_q_network.variables()): # this is equivalent to target = (1-alpha) * target + alpha * source update_op = v_target.assign_sub(self.target_network_update_rate * (v_target - v_source)) self.target_network_update.append(update_op) self.target_network_update = tf.group(*self.target_network_update) # summaries tf.scalar_summary("prediction_error", self.prediction_error) self.summarize = tf.merge_all_summaries() self.no_op1 = tf.no_op()
def build_model(self, reuse, dev, ntype): with tf.variable_scope(self.name) and tf.device(dev): if reuse: tf.get_variable_scope().reuse_variables() assert tf.get_variable_scope().reuse # Set inputs of networks self.minimap = tf.placeholder(tf.float32, [None, U.minimap_channel(), self.msize, self.msize], name='minimap') self.screen = tf.placeholder(tf.float32, [None, U.screen_channel(), self.ssize, self.ssize], name='screen') self.info = tf.placeholder(tf.float32, [None, self.isize], name='info') # Build networks net = build_net(self.minimap, self.screen, self.info, self.msize, self.ssize, len(actions.FUNCTIONS), ntype) self.spatial_action, self.non_spatial_action, self.value = net # Set targets and masks self.valid_spatial_action = tf.placeholder(tf.float32, [None], name='valid_spatial_action') self.spatial_action_selected = tf.placeholder(tf.float32, [None, self.ssize**2], name='spatial_action_selected') self.valid_non_spatial_action = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='valid_non_spatial_action') self.non_spatial_action_selected = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='non_spatial_action_selected') self.value_target = tf.placeholder(tf.float32, [None], name='value_target') # Compute log probability spatial_action_prob = tf.reduce_sum(self.spatial_action * self.spatial_action_selected, axis=1) spatial_action_log_prob = tf.log(tf.clip_by_value(spatial_action_prob, 1e-10, 1.)) non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.non_spatial_action_selected, axis=1) valid_non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.valid_non_spatial_action, axis=1) valid_non_spatial_action_prob = tf.clip_by_value(valid_non_spatial_action_prob, 1e-10, 1.) non_spatial_action_prob = non_spatial_action_prob / valid_non_spatial_action_prob non_spatial_action_log_prob = tf.log(tf.clip_by_value(non_spatial_action_prob, 1e-10, 1.)) self.summary.append(tf.summary.histogram('spatial_action_prob', spatial_action_prob)) self.summary.append(tf.summary.histogram('non_spatial_action_prob', non_spatial_action_prob)) # Compute losses, more details in https://arxiv.org/abs/1602.01783 # Policy loss and value loss action_log_prob = self.valid_spatial_action * spatial_action_log_prob + non_spatial_action_log_prob advantage = tf.stop_gradient(self.value_target - self.value) policy_loss = - tf.reduce_mean(action_log_prob * advantage) value_loss = - tf.reduce_mean(self.value * advantage) self.summary.append(tf.summary.scalar('policy_loss', policy_loss)) self.summary.append(tf.summary.scalar('value_loss', value_loss)) # TODO: policy penalty loss = policy_loss + value_loss # Build the optimizer self.learning_rate = tf.placeholder(tf.float32, None, name='learning_rate') opt = tf.train.RMSPropOptimizer(self.learning_rate, decay=0.99, epsilon=1e-10) grads = opt.compute_gradients(loss) cliped_grad = [] for grad, var in grads: self.summary.append(tf.summary.histogram(var.op.name, var)) self.summary.append(tf.summary.histogram(var.op.name+'/grad', grad)) grad = tf.clip_by_norm(grad, 10.0) cliped_grad.append([grad, var]) self.train_op = opt.apply_gradients(cliped_grad) self.summary_op = tf.summary.merge(self.summary) self.saver = tf.train.Saver(max_to_keep=100)
def testClipByNormClipped(self): # Norm clipping when clip_norm < 5 with self.test_session(): x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Norm of x = sqrt(3^2 + 4^2) = 5 np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 0.0]] clip_norm = 4.0 ans = tf.clip_by_norm(x, clip_norm) tf_ans = ans.eval() clip_tensor = tf.constant(4.0) ans = tf.clip_by_norm(x, clip_norm) tf_ans_tensor = ans.eval() self.assertAllClose(np_ans, tf_ans) self.assertAllClose(np_ans, tf_ans_tensor)
def create_grads(self, loss, exclude, network, global_network): vs = list(set(network.var.keys()) - exclude) gs = tf.gradients(loss, [network.var[v] for v in vs]) for i in xrange(len(gs)): if self.max_grad_norm > 0.: gs[i] = tf.clip_by_norm(gs[i], self.max_grad_norm) gs[i] /= self.n_threads return zip(gs, map(global_network.var.get, vs))
def clip_by_norm(tensor, clip_norm, axes=None, name=None): """Implement clip_by_norm in Tensorflow backend. See :func:`luchador.nn.ops.clip_by_norm` for the detail. """ _tensor = tf.clip_by_norm( tensor.unwrap(), clip_norm=clip_norm, axes=axes, name=name) return Tensor(tensor=_tensor, name=name)
def flatgrad(loss, var_list, clip_norm=None): grads = tf.gradients(loss, var_list) if clip_norm is not None: grads = [tf.clip_by_norm(grad, clip_norm=clip_norm) for grad in grads] return tf.concat(axis=0, values=[ tf.reshape(grad if grad is not None else tf.zeros_like(v), [numel(v)]) for (v, grad) in zip(var_list, grads) ])
def _init_optimizer(self): with tf.variable_scope('Optimizer'): self.global_step = tf.get_variable('global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.lr_ph, epsilon=1e-6) grads = self.opt.compute_gradients(self.loss) gradients, variables = zip(*grads) capped_grads = [tf.clip_by_norm(g, self.grad_clip) for g in gradients] self.train_op = self.opt.apply_gradients(zip(capped_grads, variables), global_step=self.global_step)
def clip_by_norm(gvs, grad_norm_thresh, scope="grad_clip"): """ Clip gradients by norm, and scope. Args: gvs: list of gradient variable tuples grad_norm_thresh: norm threshold to clip scope: scope for the clip operation """ if scope: with tf.name_scope(scope): gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \ for gv in gvs if gv[0]] return gvs else: gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \ for gv in gvs if gv[0]] return gvs
def compute_gradients(self, loss, var_list=None, gate_gradients=1): grads_and_vars = self._optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients) results = [] for grad, var in grads_and_vars: # grad, var = pair[0], pair[1] if grad is not None: grad = tf.clip_by_norm(grad, self._clip) results.append((grad, var)) return results
def minimize_and_clip(optimizer, objective, var_list, clip_val=10): """Minimized `objective` using `optimizer` w.r.t. variables in `var_list` while ensure the norm of the gradients for each variable is clipped to `clip_val` """ gradients = optimizer.compute_gradients(objective, var_list=var_list) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, clip_val), var) return optimizer.apply_gradients(gradients)
def testClipByNormNotClipped(self): # No norm clipping when clip_norm >= 5 with self.test_session(): x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Norm of x = sqrt(3^2 + 4^2) = 5 np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]] clip_norm = 6.0 ans = tf.clip_by_norm(x, clip_norm) tf_ans = ans.eval() self.assertAllClose(np_ans, tf_ans)
def testClipByNormZero(self): # No norm clipping when norm = 0 with self.test_session(): x = tf.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3]) # Norm = 0, no changes np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] clip_norm = 6.0 ans = tf.clip_by_norm(x, clip_norm) tf_ans = ans.eval() self.assertAllClose(np_ans, tf_ans)
def apply(self, loss): trainable = tf.trainable_variables() self._grads_and_vars = self._optimizer.compute_gradients(loss, trainable) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # Ensures that we execute the update_ops before performing the train_step self._optimize_op = self._optimizer.minimize(loss, global_step=self._global_step, colocate_gradients_with_ops=True) for _, var in self._grads_and_vars: print(var.name) var.assign(tf.clip_by_norm(var, 2.0))
def _clip_gradients(self, grad): """Clips gradients if the hyperparameter `gradient_clip_norm` requires it. Sparse tensors, in the form of IndexedSlices returned for the gradients of embeddings, require special handling. Args: grad: Gradient Tensor, IndexedSlices, or None. Returns: Optionally clipped gradient. """ if grad is not None and self.hyperparams.gradient_clip_norm > 0: if isinstance(grad, tf.IndexedSlices): tmp = tf.clip_by_norm(grad.values, self.hyperparams.gradient_clip_norm) return tf.IndexedSlices(tmp, grad.indices, grad.dense_shape) else: return tf.clip_by_norm(grad, self.hyperparams.gradient_clip_norm) else: return grad
def testClipByNormClippedWithDim0(self): # Norm clipping when clip_norm < 5 with self.test_session(): x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3]) # Norm of x[:, 0] = sqrt(3^2 + 4^2) = 5, x[:, 2] = 3 np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 3.0]] clip_norm = 4.0 ans = tf.clip_by_norm(x, clip_norm, [0]) tf_ans = ans.eval() self.assertAllClose(np_ans, tf_ans)
Actions4Act_oh = tf.one_hot(Actions4Act, 4) Act_A = Q(Act_S) Command_A = tf.argmax(Act_A, axis=-1) Act_Ap = Q(Act_Sp) PL = tf.reduce_mean( tf.pow((Act_R + tf.reduce_max(Act_A) - tf.reduce_max(Act_Ap * Actions4Act_oh)), 2)) #Q #Opt = tf.train.RMSPropOptimizer(1E-4, momentum=.0, centered=True).minimize(PL) #Opt = tf.train.MomentumOptimizer(learning_rate=1E-6, momentum=.8).minimize(PL) optimizer = tf.train.RMSPropOptimizer(1E-4, momentum=.9, centered=False) gr, va = zip(*optimizer.compute_gradients(PL)) gr = [None if gr is None else tf.clip_by_norm(grad, 5.) for grad in gr] Opt = optimizer.apply_gradients(zip(gr, va)) sess = tf.Session() sess.run(tf.global_variables_initializer()) episode = 0 while (1): episode += 1 Rp = 0. S = env.reset() #(210, 160, 3) GameScore = 0 Clives = 3 Reward_cnt = 0. CuReward = 0. R_list, S_list = [], []
def construct_graph(self, sess): with sess.graph.as_default(): # Set the random seed for tensorflow tf.set_random_seed(cfg.RNG_SEED) # Build the main computation graph layers = self.net.create_architecture( True) # is_training flag: True # Define the loss loss = layers['total_loss'] path_iter = self.pretrained_model.split('.ckpt')[0] iter_num = path_iter.split('_')[-1] # from iter_ckpt if cfg.TRAIN_MODULE_CONTINUE == 1: global_step = tf.Variable(int(iter_num), trainable=False) # from iter 0 if cfg.TRAIN_MODULE_CONTINUE == 2: global_step = tf.Variable(0, trainable=False) #lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE * 10, global_step, cfg.TRAIN.STEPSIZE * 5, cfg.TRAIN.GAMMA, staircase=True) # here we use cos lr scheme, i.e. first_decay_steps = 80000 # 2 epoches lr = cosine_decay_restarts(cfg.TRAIN.LEARNING_RATE * 10, global_step, first_decay_steps, t_mul=2.0, m_mul=1.0, alpha=0.0) self.optimizer = tf.train.MomentumOptimizer(lr, cfg.TRAIN.MOMENTUM) # list_var_to_update = [] # if cfg.TRAIN_MODULE_UPDATE == 1: # list_var_to_update = tf.trainable_variables() # if cfg.TRAIN_MODULE_UPDATE == 2: # list_var_to_update = [var for var in tf.trainable_variables() if 'fc_binary' in var.name or 'binary_classification' in var.name] # 1--Update_all_parameter, 2--Only_Update_D, 3--Update_H+O+SP, 4--updating except classifiers of S(fc) list_var_to_update = [] if cfg.TRAIN_MODULE_UPDATE == 1: list_var_to_update = tf.trainable_variables() if cfg.TRAIN_MODULE_UPDATE == 2: list_var_to_update = [ var for var in tf.trainable_variables() if 'fc_binary' in var.name or 'binary_classification' in var.name ] if cfg.TRAIN_MODULE_UPDATE == 3: list_var_to_update = [ var for var in tf.trainable_variables() if 'fc_binary' not in var.name or 'binary_classification' not in var.name ] if cfg.TRAIN_MODULE_UPDATE == 4: list_var_to_update = [ var for var in tf.trainable_variables() if 'classification' not in var.name ] grads_and_vars = self.optimizer.compute_gradients( loss, list_var_to_update) capped_gvs = [(tf.clip_by_norm(grad, 1.), var) for grad, var in grads_and_vars] train_op = self.optimizer.apply_gradients(capped_gvs, global_step=global_step) self.saver = tf.train.Saver(max_to_keep=cfg.TRAIN.SNAPSHOT_KEPT) # Write the train and validation information to tensorboard self.writer = tf.summary.FileWriter(self.tbdir, sess.graph) return lr, train_op
def __init__(self, img_shape, train_mode=True, model_path=None, latent_dim=100, noise='uniform', batch_size=64, d_learning_rate=1e-4, g_learning_rate=3e-4, eps=1e-8, Wloss=False, Bn=True, Adam=True ): """ Wloss: true for using loss introduced in WGAN; default is vanilla GAN loss Bn: true for using batch normalization (also indicates no bias) Adam: true for using Adam optimizer; false for using rmsprop """ self.img_shape = img_shape self.train_mode = train_mode self.model_path = model_path self.H = img_shape[0] self.W = img_shape[1] self.C = img_shape[2] self.z_size = latent_dim self.batch_size = batch_size self.Wloss = Wloss self.Bn = Bn # build model self.DO_SHARE = None self.x_r = tf.placeholder(tf.float32, shape=[self.batch_size] + list(self.img_shape)) if noise == 'normal': z = tf.random_normal((self.batch_size, 1, 1, self.z_size), 0, 1) elif noise == 'uniform': z = tf.random_uniform((self.batch_size, 1, 1, self.z_size), -1, 1) self.x_g = self.generator(z) if self.Bn: yl_r = self.discriminator(self.x_r) self.DO_SHARE = True yl_g = self.discriminator(self.x_g) else: x = tf.concat(0, [self.x_r, self.x_g]) yl = self.discriminator(x) yl_r, yl_g = tf.split(0, 2, yl) if Wloss: self.d_loss = tf.reduce_mean(yl_r - yl_g, axis=0) self.g_loss = tf.reduce_mean(yl_g, axis=0) else: # Vanilla GAN loss self.d_loss = ganloss(yl_r) + ganloss(yl_g, 0.) # no smooth label for fake data by improved GAN paper self.g_loss = ganloss(yl_g) t_vars = tf.trainable_variables() self.d_vars = [var for var in t_vars if 'd_' in var.name] self.g_vars = [var for var in t_vars if 'g_' in var.name] if Adam: self.d_optimizer = tf.train.AdamOptimizer(d_learning_rate, beta1=0.5, beta2=0.999) d_grads = self.d_optimizer.compute_gradients(self.d_loss, self.d_vars) clip_d_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in d_grads if grad is not None] self.d_optimizer = self.d_optimizer.apply_gradients(clip_d_grads) self.g_optimizer = tf.train.AdamOptimizer(g_learning_rate, beta1=0.5, beta2=0.999) g_grads = self.g_optimizer.compute_gradients(self.g_loss, self.g_vars) clip_g_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in g_grads if grad is not None] self.g_optimizer = self.g_optimizer.apply_gradients(clip_g_grads) else: self.d_optimizer = tf.train.RMSPropOptimizer(d_learning_rate, decay=0.99, epsilon=eps) d_grads = self.d_optimizer.compute_gradients(self.d_loss, self.d_vars) #clip_d_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in d_grads if grad is not None] clip_d_grads = [(grad, var) for grad, var in d_grads if grad is not None] self.d_optimizer = self.d_optimizer.apply_gradients(clip_d_grads) self.g_optimizer = tf.train.RMSPropOptimizer(g_learning_rate, decay=0.99, epsilon=eps) g_grads = self.g_optimizer.compute_gradients(self.g_loss, self.g_vars) #clip_g_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in g_grads if grad is not None] clip_g_grads = [(grad, var) for grad, var in g_grads if grad is not None] self.g_optimizer = self.g_optimizer.apply_gradients(clip_g_grads) self.d_clip = [tf.assign(var, tf.clip_by_value(var, -0.01, 0.01)) for var in self.d_vars]
def corrections_func(mainPN, batch_size, trace_length, corrections=False, cube=None, clip_lola_update_norm=False, lola_correction_multiplier=1.0, clip_lola_correction_norm=False, clip_lola_actor_norm=False, against_destabilizer_exploiter=False): """Computes corrections for policy gradients. Args: ----- mainPN: list of policy/Q-networks batch_size: int trace_length: int corrections: bool (default: False) Whether policy networks should use corrections. cube: tf.Varialbe or None (default: None) If provided, should be constructed via `lola.utils.make_cube`. Used for variance reduction of the value estimation. When provided, the computation graph for corrections is faster to compile but is quite memory inefficient. When None, variance reduction graph is contructed dynamically, is a little longer to compile, but has lower memory footprint. """ # not mem_efficient if cube is not None: ac_logp0 = tf.reshape(mainPN[0].log_pi_action_bs_t, [batch_size, 1, trace_length]) ac_logp1 = tf.reshape(mainPN[1].log_pi_action_bs_t, [batch_size, trace_length, 1]) mat_1 = tf.reshape(tf.squeeze(tf.matmul(ac_logp1, ac_logp0)), [batch_size, 1, trace_length * trace_length]) v_0 = tf.matmul( tf.reshape(mainPN[0].sample_reward, [batch_size, trace_length, 1]), mat_1) v_0 = tf.reshape( v_0, [batch_size, trace_length, trace_length, trace_length]) v_1 = tf.matmul( tf.reshape(mainPN[1].sample_reward, [batch_size, trace_length, 1]), mat_1) v_1 = tf.reshape( v_1, [batch_size, trace_length, trace_length, trace_length]) v_0 = 2 * tf.reduce_sum(v_0 * cube) / batch_size v_1 = 2 * tf.reduce_sum(v_1 * cube) / batch_size # wt mem_efficient else: ac_logp0 = tf.reshape(mainPN[0].log_pi_action_bs_t, [batch_size, trace_length]) ac_logp1 = tf.reshape(mainPN[1].log_pi_action_bs_t, [batch_size, trace_length]) # Static exclusive cumsum ac_logp0_cumsum = [tf.constant(0.)] ac_logp1_cumsum = [tf.constant(0.)] for i in range(trace_length - 1): ac_logp0_cumsum.append(tf.add(ac_logp0_cumsum[-1], ac_logp0[:, i])) ac_logp1_cumsum.append(tf.add(ac_logp1_cumsum[-1], ac_logp1[:, i])) # Compute v_0 and v_1 mat_cumsum = ac_logp0[:, 0] * ac_logp1[:, 0] v_0 = mat_cumsum * mainPN[0].sample_reward[:, 0] v_1 = mat_cumsum * mainPN[1].sample_reward[:, 0] for i in range(1, trace_length): mat_cumsum = tf.add(mat_cumsum, ac_logp0[:, i] * ac_logp1[:, i]) mat_cumsum = tf.add(mat_cumsum, ac_logp0_cumsum[i] * ac_logp1[:, i]) mat_cumsum = tf.add(mat_cumsum, ac_logp1_cumsum[i] * ac_logp0[:, i]) v_0 = tf.add(v_0, mat_cumsum * mainPN[0].sample_reward[:, i]) v_1 = tf.add(v_1, mat_cumsum * mainPN[1].sample_reward[:, i]) v_0 = 2 * tf.reduce_sum(v_0) / batch_size if against_destabilizer_exploiter: v_1 = 2 * v_1 / batch_size else: v_1 = 2 * tf.reduce_sum(v_1) / batch_size mainPN[0].v_0_log = v_0 mainPN[1].v_1_log = v_1 actor_target_error_0 = (mainPN[0].target - tf.stop_gradient(mainPN[0].value)) v_0_pi_0 = 2*tf.reduce_sum((actor_target_error_0* mainPN[0].gamma_array) * mainPN[0].log_pi_action_bs_t) / \ batch_size v_0_pi_1 = 2*tf.reduce_sum((actor_target_error_0 * mainPN[1].gamma_array) * mainPN[1].log_pi_action_bs_t) / \ batch_size actor_target_error_1 = (mainPN[1].target - tf.stop_gradient(mainPN[1].value)) v_1_pi_0 = 2 * tf.reduce_sum( (actor_target_error_1 * mainPN[0].gamma_array) * mainPN[0].log_pi_action_bs_t) / batch_size v_1_pi_1 = 2 * tf.reduce_sum( (actor_target_error_1 * mainPN[1].gamma_array) * mainPN[1].log_pi_action_bs_t) / batch_size mainPN[0].actor_target_error = actor_target_error_0 mainPN[1].actor_target_error = actor_target_error_1 mainPN[0].actor_loss = v_0_pi_0 mainPN[1].actor_loss = v_1_pi_1 mainPN[0].value_used_for_correction = v_0 mainPN[1].value_used_for_correction = v_1 v_0_grad_theta_0 = flatgrad(v_0_pi_0, mainPN[0].parameters) v_0_grad_theta_1 = flatgrad(v_0_pi_1, mainPN[1].parameters) v_1_grad_theta_0 = flatgrad(v_1_pi_0, mainPN[0].parameters) v_1_grad_theta_1 = flatgrad(v_1_pi_1, mainPN[1].parameters) mainPN[0].grad = v_0_grad_theta_0 mainPN[1].grad = v_1_grad_theta_1 mainPN[0].grad_sum = tf.math.reduce_sum(v_0_grad_theta_0) mainPN[1].grad_sum = tf.math.reduce_sum(v_1_grad_theta_1) mainPN[0].grad_v_1 = v_1_grad_theta_0 mainPN[1].grad_v_0 = v_0_grad_theta_1 if corrections: v_0_grad_theta_0_wrong = flatgrad(v_0, mainPN[0].parameters) if against_destabilizer_exploiter: # v_1_grad_theta_1_wrong_splits = [ flatgrad(v_1[i], mainPN[1].parameters) for i in range(batch_size)] # v_1_grad_theta_1_wrong = tf.stack(v_1_grad_theta_1_wrong_splits, axis=1) v_1_grad_theta_1_wrong = tf.vectorized_map( partial(flatgrad, var_list=mainPN[1].parameters), v_1) else: v_1_grad_theta_1_wrong = flatgrad(v_1, mainPN[1].parameters) param_len = v_0_grad_theta_0_wrong.get_shape()[0].value # param_len = -1 if against_destabilizer_exploiter: multiply0 = tf.matmul( tf.reshape(tf.stop_gradient(v_0_grad_theta_1), [1, param_len]), tf.reshape(v_1_grad_theta_1_wrong, [param_len, batch_size])) else: multiply0 = tf.matmul( tf.reshape(tf.stop_gradient(v_0_grad_theta_1), [1, param_len]), tf.reshape(v_1_grad_theta_1_wrong, [param_len, 1])) multiply1 = tf.matmul( tf.reshape(tf.stop_gradient(v_1_grad_theta_0), [1, param_len]), tf.reshape(v_0_grad_theta_0_wrong, [param_len, 1])) if against_destabilizer_exploiter: second_order0 = flatgrad(multiply0, mainPN[0].parameters) second_order0 = second_order0[:, None] # second_order0_splits = [flatgrad(multiply0[:, i], mainPN[0].parameters) for i in range(batch_size)] # second_order0 = tf.stack(second_order0_splits, axis=1) # second_order0 = tf.vectorized_map(partial(flatgrad, var_list=mainPN[0].parameters), multiply0[0, :]) # second_order0 = tf.reshape(second_order0, [param_len, batch_size]) else: second_order0 = flatgrad(multiply0, mainPN[0].parameters) second_order1 = flatgrad(multiply1, mainPN[1].parameters) mainPN[0].multiply0 = multiply0 mainPN[0].v_0_grad_01 = second_order0 mainPN[1].v_1_grad_10 = second_order1 mainPN[0].second_order = tf.math.reduce_sum(second_order0) mainPN[1].second_order = tf.math.reduce_sum(second_order1) if against_destabilizer_exploiter: second_order0 = tf.math.reduce_sum(second_order0, axis=1) second_order0 = (second_order0 * lola_correction_multiplier) second_order1 = (second_order1 * lola_correction_multiplier) if clip_lola_correction_norm: second_order0 = tf.clip_by_norm(second_order0, clip_lola_correction_norm, axes=None, name=None) second_order1 = tf.clip_by_norm(second_order1, clip_lola_correction_norm, axes=None, name=None) if clip_lola_actor_norm: v_0_grad_theta_0 = tf.clip_by_norm(v_0_grad_theta_0, clip_lola_actor_norm, axes=None, name=None) v_1_grad_theta_1 = tf.clip_by_norm(v_1_grad_theta_1, clip_lola_actor_norm, axes=None, name=None) delta_0 = v_0_grad_theta_0 + second_order0 delta_1 = v_1_grad_theta_1 + second_order1 if clip_lola_update_norm: delta_0 = tf.clip_by_norm(delta_0, clip_lola_update_norm, axes=None, name=None) delta_1 = tf.clip_by_norm(delta_1, clip_lola_update_norm, axes=None, name=None) mainPN[0].delta = delta_0 mainPN[1].delta = delta_1 else: mainPN[0].delta = v_0_grad_theta_0 mainPN[1].delta = v_1_grad_theta_1 # To prevent some logic about logging stuff mainPN[0].v_0_grad_01 = tf.reduce_sum(v_0_grad_theta_0) * 0.0 mainPN[1].v_1_grad_10 = tf.reduce_sum(v_0_grad_theta_0) * 0.0
def build_graph(sess, dictionary, NUM_CLASSES, vocabulary_size, embedding_size, input_tensor_shape_arr, output_tensor_shape_arr, bucketwise_max_level_arr): #TODO experiment with minval and maxval W = tf.Variable(tf.random_uniform([embedding_size, embedding_size], minval=-1.0, maxval=1.0), name='rnn_w_general', dtype=tf.float32) V_in = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], minval=-1.0, maxval=1.0), name='embedding_matrix_in', dtype=tf.float32) V_out = tf.Variable(tf.random_uniform([NUM_CLASSES, embedding_size], minval=-1.0, maxval=1.0), name='embedding_matrix_out', dtype=tf.float32) b = tf.Variable(np.zeros((embedding_size)), name='bias_general', dtype=tf.float32) W_out = tf.Variable(tf.random_uniform([embedding_size, NUM_CLASSES], minval=-1.0, maxval=1.0), name='w_out', dtype=tf.float32) b_out = tf.Variable(np.zeros((NUM_CLASSES)), name='bias_out', dtype=tf.float32) # input_shape_tensor = tf.placeholder(tf.int32,shape=[1]) recursion_out_arr = [] loss_arr = [] train_step_op_arr = [] input_tensor_arr = [] output_tensor_arr = [] print('Preparing graph...') for index, level_arr in tqdm(enumerate(bucketwise_max_level_arr)): if index > config.MAX_NUM_BUCKETS_TO_TRAIN: continue input_tensor_shape = input_tensor_shape_arr[index] output_tensor_shape = output_tensor_shape_arr[index] input_tensor = tf.placeholder(tf.int32, shape=input_tensor_shape) output_tensor = tf.placeholder(tf.int32, shape=output_tensor_shape) # print('level_arr') # print(level_arr) # print('input_tensor_shape') # print(input_tensor_shape) # input_tensor = tf.placeholder(tf.int32,shape=(9,None)) # print("--------------------------------") # level_arr = [3,3] recursion_out, _ = runRecursiveGraph2(input_tensor, W, V_in, V_out, b, 0, level_arr, 0) # exit(0) loss, logits = getLoss({ 'weights': W_out, 'biases': b_out, 'num_classes': NUM_CLASSES, 'output_word': output_tensor, 'network_output': recursion_out, }) optimizer = tf.train.AdamOptimizer(1e-3) gradients, variables = zip(*optimizer.compute_gradients(loss)) gradients = [ None if gradient is None else tf.clip_by_norm(gradient, 5.0) for gradient in gradients ] train_step_op = optimizer.apply_gradients(zip(gradients, variables)) #TODO: check if some variable has already been initialised if selective initialisation print("*************Initialising variables*****************") for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): print("Initialising " + v.op.name) sess.run(v.initializer) print("Uninitialised varaiables") print(tf.report_uninitialized_variables()) recursion_out_arr.append(recursion_out) loss_arr.append(loss) train_step_op_arr.append(train_step_op) input_tensor_arr.append(input_tensor) output_tensor_arr.append(output_tensor) return recursion_out_arr,\ loss_arr,\ train_step_op_arr,\ input_tensor_arr,\ output_tensor_arr
attn_dnn1 = attn_dnn0 with tf.name_scope("DNN_decode"): _pred , logits , _attn_map = dynamic_deconv(attn_dnn1,_embd,_embd_T,t_place) with tf.name_scope("Loss"): ce = tf.nn.softmax_cross_entropy_with_logits_v2(labels=ys_one_hot,logits=logits)*ys_mask ce = tf.reduce_sum(ce,axis=1,keepdims=False)/tf.cast(_ys_length,tf.float32) _loss = tf.reduce_mean(ce) with tf.name_scope("Train"): g_step = tf.Variable(0,dtype=tf.int32,trainable=False,name="Global_step") lr = tf.train.exponential_decay(3e-4,g_step,2000,0.95,staircase=True) opt = tf.train.AdamOptimizer(lr) allgrads = opt.compute_gradients(_loss) clip_grads = [ ( tf.clip_by_norm(grad,3) ,var) for grad , var in allgrads] _update = opt.apply_gradients(clip_grads,global_step=g_step) # _update = opt.minimize(_loss,global_step=g_step) _global_step_assign = tf.placeholder(tf.int32) assign_g_step = g_step.assign_add(_global_step_assign) # with tf.name_scope("Gradient"): # _t_position = tf.placeholder(tf.int32) # _p_position = _pred[0,_t_position] # xs_vector_gradnorm = get_grad_norm(_t_position,_p_position,x_vector) # dnn1_gradnorm = get_grad_norm(_t_position,_p_position,attn_dnn0) # dnn2_gradnorm = get_grad_norm(_t_position,_p_position,attn_dnn1) all_var = tf.trainable_variables() _init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=8,var_list=tf.trainable_variables()) # tf.summary.FileWriter(log_path,graph=g)
def __init__(self, model_name=None, session=None, learning_rate=None, optimizer=None, learning_decay_rate=None, filter_sizes=None, num_filters=None, max_sentence_length=None, num_classes=None, embeddings=None, new_embeddings=None, embedding_dim=None, vocabulary_size=None, static=None, max_l2_norm=None, regularization_lambda=None, dropout_keep_prob=None): if model_name == None: return self.model_name = model_name self.session = session self.learning_rate = learning_rate self.optimizer = optimizer self.dropout_keep_prob_train = dropout_keep_prob self.regularization_lambda = regularization_lambda ############### # # model definition self.input_x = tf.placeholder(shape=(None, max_sentence_length), dtype=tf.int32, name="input_x") self.input_y = tf.placeholder(shape=(None, num_classes), dtype=tf.float32, name="input_y") self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name="dropout_keep_prob") # ===== EMBEDDING LAYER self.embeddings_placeholder = tf.placeholder(tf.float32, shape=(vocabulary_size, embedding_dim)) self.embeddings = tf.Variable(self.embeddings_placeholder, trainable=not static) self.new_embeddings = tf.Variable(new_embeddings, trainable=True) self.all_embeddings = tf.concat([self.embeddings, self.new_embeddings], axis=0) self.embedded_words = tf.nn.embedding_lookup(self.all_embeddings, self.input_x) # ===== CONVOLUTIONAL LAYER self.input_x_expanded = tf.expand_dims(self.embedded_words, -1) self.pool_results = [] for i, filter_size in enumerate(filter_sizes): filter = tf.get_variable( "filter" + str(i), shape=(filter_size, embedding_dim, 1, num_filters), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) if max_l2_norm != 0: filter = tf.clip_by_norm(filter, max_l2_norm) bias = tf.Variable(tf.constant(0.0, shape=(num_filters, ))) conv = tf.nn.conv2d( input=self. input_x_expanded, # [batch, in_height, in_width, in_channels] filter= filter, # [filter_height, filter_width, in_channels, out_channels] strides=[1, 1, 1, 1], padding="VALID") relu = tf.nn.relu(tf.nn.bias_add(conv, bias)) conv_dim = max_sentence_length - filter_size + 1 pooled = tf.nn.max_pool(relu, ksize=[1, conv_dim, 1, 1], strides=[1, 1, 1, 1], padding='VALID') self.pool_results.append(pooled) # FLATTENING LAYER num_filters_total = num_filters * len(filter_sizes) self.flat = tf.reshape(tf.concat(self.pool_results, 3), [-1, num_filters_total]) # DROPOUT LAYER self.dropout = tf.nn.dropout(self.flat, self.dropout_keep_prob) # FULLY CONNECTED LAYER W = tf.get_variable("W", shape=(num_filters_total, num_classes), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=(num_classes, ))) self.output = tf.nn.xw_plus_b(self.dropout, W, b, name="output") self.predictions = tf.argmax(self.output, 1, name="predictions") losses = tf.nn.softmax_cross_entropy_with_logits(labels=self.input_y, logits=self.output) if regularization_lambda != 0: l2_loss = tf.nn.l2_loss(W) self.loss = tf.reduce_mean(tf.add( losses, tf.multiply(self.regularization_lambda, l2_loss)), name="loss") else: self.loss = tf.reduce_mean(losses, name="loss") # # ############### # optimization method self.optimizer = optimizer(learning_rate=self.learning_rate) # training operation self.train_op = self.optimizer.minimize(self.loss) # saver self.saver = tf.train.Saver() # initialize variables self.session.run(tf.global_variables_initializer(), feed_dict={self.embeddings_placeholder: embeddings})
def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping=None, gamma=1.0, double_q=True, scope="deepq", reuse=None, param_noise=False, param_noise_filter_func=None): """Creates the train function: Parameters ---------- make_obs_ph: str -> tf.placeholder or TfInput a function that takes a name and creates a placeholder of input with that name q_func: (tf.Variable, int, str, bool) -> tf.Variable the model that takes the following inputs: observation_in: object the output of observation placeholder num_actions: int number of actions scope: str reuse: bool should be passed to outer variable scope and returns a tensor of shape (batch_size, num_actions) with values of every action. num_actions: int number of actions reuse: bool whether or not to reuse the graph variables optimizer: tf.train.Optimizer optimizer to use for the Q-learning objective. grad_norm_clipping: float or None clip gradient norms to this value. If None no clipping is performed. gamma: float discount rate. double_q: bool if true will use Double Q Learning (https://arxiv.org/abs/1509.06461). In general it is a good idea to keep it enabled. scope: str or VariableScope optional scope for variable_scope. reuse: bool or None whether or not the variables should be reused. To be able to reuse the scope must be given. param_noise: bool whether or not to use parameter space noise (https://arxiv.org/abs/1706.01905) param_noise_filter_func: tf.Variable -> bool function that decides whether or not a variable should be perturbed. Only applicable if param_noise is True. If set to None, default_param_noise_filter is used by default. Returns ------- act: (tf.Variable, bool, float) -> tf.Variable function to select and action given observation. ` See the top of the file for details. train: (object, np.array, np.array, object, np.array, np.array) -> np.array optimize the error in Bellman's equation. ` See the top of the file for details. update_target: () -> () copy the parameters from optimized Q function to the target Q function. ` See the top of the file for details. debug: {str: function} a bunch of functions to print debug data like q_values. """ if param_noise: act_f = build_act_with_param_noise( make_obs_ph, q_func, num_actions, scope=scope, reuse=reuse, param_noise_filter_func=param_noise_filter_func) else: act_f = build_act(make_obs_ph, q_func, num_actions, scope=scope, reuse=reuse) with tf.variable_scope(scope, reuse=reuse): # set up placeholders obs_t_input = make_obs_ph("obs_t") act_t_ph = tf.placeholder(tf.int32, [None], name="action") rew_t_ph = tf.placeholder(tf.float32, [None], name="reward") obs_tp1_input = make_obs_ph("obs_tp1") done_mask_ph = tf.placeholder(tf.float32, [None], name="done") importance_weights_ph = tf.placeholder(tf.float32, [None], name="weight") # q network evaluation q_t = q_func(obs_t_input.get(), num_actions, scope="q_func", reuse=True) # reuse parameters from act q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/q_func") # target q network evalution q_tp1 = q_func(obs_tp1_input.get(), num_actions, scope="target_q_func") target_q_func_vars = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/target_q_func") # q scores for actions which we know were selected in the given state. q_t_selected = tf.reduce_sum(q_t * tf.one_hot(act_t_ph, num_actions), 1) # compute estimate of best possible value starting from state at t + 1 if double_q: q_tp1_using_online_net = q_func(obs_tp1_input.get(), num_actions, scope="q_func", reuse=True) q_tp1_best_using_online_net = tf.argmax(q_tp1_using_online_net, 1) q_tp1_best = tf.reduce_sum( q_tp1 * tf.one_hot(q_tp1_best_using_online_net, num_actions), 1) else: q_tp1_best = tf.reduce_max(q_tp1, 1) q_tp1_best_masked = (1.0 - done_mask_ph) * q_tp1_best # compute RHS of bellman equation q_t_selected_target = rew_t_ph + gamma * q_tp1_best_masked # compute the error (potentially clipped) td_error = q_t_selected - tf.stop_gradient(q_t_selected_target) errors = U.huber_loss(td_error) weighted_error = tf.reduce_mean(importance_weights_ph * errors) # compute optimization op (potentially with gradient clipping) if grad_norm_clipping is not None: gradients = optimizer.compute_gradients(weighted_error, var_list=q_func_vars) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, grad_norm_clipping), var) optimize_expr = optimizer.apply_gradients(gradients) else: optimize_expr = optimizer.minimize(weighted_error, var_list=q_func_vars) # update_target_fn will be called periodically to copy Q network to target Q network update_target_expr = [] for var, var_target in zip( sorted(q_func_vars, key=lambda v: v.name), sorted(target_q_func_vars, key=lambda v: v.name)): update_target_expr.append(var_target.assign(var)) update_target_expr = tf.group(*update_target_expr) # Create callable functions train = U.function(inputs=[ obs_t_input, act_t_ph, rew_t_ph, obs_tp1_input, done_mask_ph, importance_weights_ph ], outputs=td_error, updates=[optimize_expr]) update_target = U.function([], [], updates=[update_target_expr]) q_values = U.function([obs_t_input], q_t) return act_f, train, update_target, {'q_values': q_values}
def _grad_clip_by_norm_grad(op, grad): _, norm = op.inputs return (tf.clip_by_norm(grad, norm), None)
def build_model(self, reuse, dev, ntype): with tf.variable_scope(self.name) and tf.device(dev): if reuse: tf.get_variable_scope().reuse_variables() assert tf.get_variable_scope().reuse # Set inputs of networks self.minimap = tf.placeholder( tf.float32, [None, U.minimap_channel(), self.msize, self.msize], name='minimap') self.screen = tf.placeholder( tf.float32, [None, U.screen_channel(), self.ssize, self.ssize], name='screen') self.info = tf.placeholder(tf.float32, [None, self.isize], name='info') # Build networks net = build_net(self.minimap, self.screen, self.info, self.msize, self.ssize, len(actions.FUNCTIONS), ntype) self.spatial_action, self.non_spatial_action, self.value = net # Set targets and masks self.valid_spatial_action = tf.placeholder( tf.float32, [None], name='valid_spatial_action') self.spatial_action_selected = tf.placeholder( tf.float32, [None, self.ssize**2], name='spatial_action_selected') self.valid_non_spatial_action = tf.placeholder( tf.float32, [None, len(actions.FUNCTIONS)], name='valid_non_spatial_action') self.non_spatial_action_selected = tf.placeholder( tf.float32, [None, len(actions.FUNCTIONS)], name='non_spatial_action_selected') self.value_target = tf.placeholder(tf.float32, [None], name='value_target') # Compute log probability spatial_action_prob = tf.reduce_sum(self.spatial_action * self.spatial_action_selected, axis=1) spatial_action_log_prob = tf.log( tf.clip_by_value(spatial_action_prob, 1e-10, 1.)) non_spatial_action_prob = tf.reduce_sum( self.non_spatial_action * self.non_spatial_action_selected, axis=1) valid_non_spatial_action_prob = tf.reduce_sum( self.non_spatial_action * self.valid_non_spatial_action, axis=1) valid_non_spatial_action_prob = tf.clip_by_value( valid_non_spatial_action_prob, 1e-10, 1.) non_spatial_action_prob = non_spatial_action_prob / valid_non_spatial_action_prob non_spatial_action_log_prob = tf.log( tf.clip_by_value(non_spatial_action_prob, 1e-10, 1.)) self.summary.append( tf.summary.histogram('spatial_action_prob', spatial_action_prob)) self.summary.append( tf.summary.histogram('non_spatial_action_prob', non_spatial_action_prob)) # Compute losses, more details in https://arxiv.org/abs/1602.01783 # Policy loss and value loss action_log_prob = self.valid_spatial_action * spatial_action_log_prob + non_spatial_action_log_prob advantage = tf.stop_gradient(self.value_target - self.value) policy_loss = -tf.reduce_mean(action_log_prob * advantage) value_loss = -tf.reduce_mean(self.value * advantage) print('net:' + str(advantage)) self.summary.append(tf.summary.scalar('policy_loss', policy_loss)) self.summary.append(tf.summary.scalar('value_loss', value_loss)) # TODO: policy penalty loss = policy_loss + value_loss # Build the optimizer self.learning_rate = tf.placeholder(tf.float32, None, name='learning_rate') opt = tf.train.RMSPropOptimizer(self.learning_rate, decay=0.99, epsilon=1e-10) grads = opt.compute_gradients(loss) cliped_grad = [] for grad, var in grads: self.summary.append(tf.summary.histogram(var.op.name, var)) #print('grad name:'+str(var.op.name) + ' , ' + str(grad)) self.summary.append( tf.summary.histogram(var.op.name + '/grad', grad)) grad = tf.clip_by_norm(grad, 10.0) cliped_grad.append([grad, var]) self.train_op = opt.apply_gradients(cliped_grad) self.summary_op = tf.summary.merge(self.summary) self.saver = tf.train.Saver(max_to_keep=100)
def create_variables(self): # compute action from a state: a* = argmax_a Q(s_t,a) with tf.name_scope("predict_actions"): # raw state representation self.states = tf.placeholder(tf.float32, (None, self.state_dim), name="states") # initialize Q network with tf.variable_scope("q_network"): self.q_outputs = self.value_network(self.states, self.player) # predict actions from Q network self.action_scores = tf.identity(self.q_outputs, name="action_scores") tf.summary.histogram("action_scores", self.action_scores) self.predicted_actions = tf.argmax(self.action_scores, dimension=1, name="predicted_actions") # estimate rewards using the next state: r(s_t,a_t) + argmax_a Q(s_{t+1}, a) with tf.name_scope("estimate_future_rewards"): self.next_states = tf.placeholder(tf.float32, (None, self.state_dim), name="next_states") self.next_state_mask = tf.placeholder(tf.float32, (None, ), name="next_state_masks") if self.double_q_learning: # reuse Q network for action selection with tf.variable_scope("q_network", reuse=True): self.q_next_outputs = self.value_network( self.next_states, self.player) self.action_selection = tf.argmax(tf.stop_gradient( self.q_next_outputs), 1, name="action_selection") tf.histogram_summary("action_selection", self.action_selection) self.action_selection_mask = tf.one_hot( self.action_selection, self.num_actions, 1, 0) # use target network for action evaluation with tf.variable_scope("target_network"): self.target_outputs = self.value_network( self.next_states, self.player) * tf.cast( self.action_selection_mask, tf.float32) self.action_evaluation = tf.reduce_sum(self.target_outputs, reduction_indices=[ 1, ]) tf.histogram_summary("action_evaluation", self.action_evaluation) self.target_values = self.action_evaluation * self.next_state_mask else: # initialize target network with tf.variable_scope("target_network"): self.target_outputs = self.value_network( self.next_states, self.player) # compute future rewards self.next_action_scores = tf.stop_gradient(self.target_outputs) self.target_values = tf.reduce_max(self.next_action_scores, reduction_indices=[ 1, ]) * self.next_state_mask tf.summary.histogram("next_action_scores", self.next_action_scores) self.rewards = tf.placeholder(tf.float32, (None, ), name="rewards") self.future_rewards = self.rewards + self.discount_factor * self.target_values # compute loss and gradients with tf.name_scope("compute_temporal_differences"): # compute temporal difference loss self.action_mask = tf.placeholder(tf.float32, (None, self.num_actions), name="action_mask") self.masked_action_scores = tf.reduce_sum(self.action_scores * self.action_mask, reduction_indices=[ 1, ]) self.temp_diff = self.masked_action_scores - self.future_rewards self.td_loss = tf.reduce_mean(tf.square(self.temp_diff)) # regularization loss q_network_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope="q_network") self.reg_loss = self.reg_param * tf.reduce_sum( [tf.reduce_sum(tf.square(x)) for x in q_network_variables]) # compute total loss and gradients self.loss = self.td_loss + self.reg_loss gradients = self.optimizer.compute_gradients(self.loss) # clip gradients by norm for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, self.max_gradient), var) # add histograms for gradients. for grad, var in gradients: tf.summary.histogram(var.name, var) if grad is not None: tf.summary.histogram(var.name + '/gradients', grad) self.train_op = self.optimizer.apply_gradients(gradients) # update target network with Q network with tf.name_scope("update_target_network"): self.target_network_update = [] # slowly update target network parameters with Q network parameters q_network_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope="q_network") target_network_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope="target_network") for v_source, v_target in zip(q_network_variables, target_network_variables): # this is equivalent to target = (1-alpha) * target + alpha * source update_op = v_target.assign_sub(self.target_update_rate * (v_target - v_source)) self.target_network_update.append(update_op) self.target_network_update = tf.group(*self.target_network_update) # scalar summaries tf.summary.scalar("td_loss", self.td_loss) tf.summary.scalar("reg_loss", self.reg_loss) tf.summary.scalar("total_loss", self.loss) tf.summary.scalar("exploration", self.exploration) self.summarize = tf.summary.merge_all() self.no_op = tf.no_op()
def _clip(self, vec): if self.hmc_clip <= 0: return vec return tf.clip_by_norm(vec, self.hmc_clip, axes=[1])
def _euler_q(self, q, p, eps, M): p = tf.reshape(p, tf.shape(q)) q_new = q + eps * (1 + tf.clip_by_norm(self.gx(p), 1)) / M return q_new
tf.compat.v1.summary.scalar(name='sum_step_loss', tensor=loss) total_loss = loss if params['reg_loss']: reg_loss = tf.reduce_sum(model.losses) total_loss += reg_loss tf.compat.v1.summary.scalar(name='regularization', tensor=reg_loss) tf.compat.v1.summary.scalar(name='total_loss', tensor=total_loss) tf.compat.v1.summary.scalar(name='lr', tensor=tf_vr_lr_in) opt = tf.compat.v1.train.AdamOptimizer(learning_rate=tf_vr_lr_in) if params['clip_grad']: gvs = opt.compute_gradients(total_loss) clipped_gvs = [(tf.clip_by_norm(grad, clip_norm=0.001), var) for (grad, var) in gvs] train_step = opt.apply_gradients(clipped_gvs) else: train_step = opt.minimize(total_loss) model.summary(print_fn=log.info) sess.initialize_variables() if params['pretf']: log.info('load a pre-trained model: {}'.format(params['pretf'])) ld_model = keras.models.load_model(params['pretf'], compile=False) model.set_weights(ld_model.get_weights()) if params['inittf']:
embeddings = np.array(total_data[5]) print("embedding_shape =", np.shape(embeddings)) print("voca_len = ", len(voca)) # model build cnn = model_build(sequence_length=len(x_train[0]), num_classes=len(y_train[0]), vocab_size=len(voca), embeddings=embeddings) sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) saver = tf.train.Saver(tf.global_variables()) global_step = tf.Variable(0, trainable=False, name='global_step') optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) for i, (g, v) in enumerate(grads_and_vars): if g is not None: grads_and_vars[i] = (tf.clip_by_norm(g, l2_lambda()), v) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) ckpt = tf.train.get_checkpoint_state('./TRECmodel') devlen = int(len(x_train) * dev_ratio()) test_accuracy_ave = 0 test_accuracy_list = [] each_accuracy = [] each_loss = [] if model_variation() == 0: print("Model : rand") elif model_variation() == 1: print("Model : static") elif model_variation() == 2: print("Model : non-static")
def train(self, trial_batch_generator, train_params={}): """ Train the network. Arguments: trial_batch_generator (:class:`~psychrnn.tasks.task.Task` object or *Generator[tuple, None, None]*): the task to train on, or the task to train on's batch_generator. If a task is passed in, task.:func:`batch_generator` () will be called to get the generator for the task to train on. train_params (dict, optional): Dictionary of training parameters containing the following possible keys: :Dictionary Keys: * **learning_rate** (*float, optional*) -- Sets learning rate if use default optimizer Default: .001 * **training_iters** (*int, optional*) -- Number of iterations to train for Default: 50000. * **loss_epoch** (*int, optional*) -- Compute and record loss every 'loss_epoch' epochs. Default: 10. * **verbosity** (*bool, optional*) -- If true, prints information as training progresses. Default: True. * **save_weights_path** (*str, optional*) -- Where to save the model after training. Default: None * **save_training_weights_epoch** (*int, optional*) -- Save training weights every 'save_training_weights_epoch' epochs. Weights only actually saved if :data:`training_weights_path` is set. Default: 100. * **training_weights_path** (*str, optional*) -- What directory to save training weights into as training progresses. Default: None. * **curriculum** (`~psychrnn.backend.curriculum.Curriculum` *object, optional*) -- Curriculum to train on. If a curriculum object is provided, it overrides the trial_batch_generator argument. Default: None. * **optimizer** (`tf.compat.v1.train.Optimizer <https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/Optimizer>`_ *object, optional*) -- What optimizer to use to compute gradients. Default: `tf.train.AdamOptimizer <https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/AdamOptimizer>`_ (learning_rate=:data:`train_params`['learning_rate']` ). * **clip_grads** (*bool, optional*) -- If true, clip gradients by norm 1. Default: True * **fixed_weights** (*dict, optional*) -- By default all weights are allowed to train unless :data:`fixed_weights` or :data:`W_rec_train`, :data:`W_in_train`, or :data:`W_out_train` are set. Default: None. Dictionary of weights to fix (not allow to train) with the following optional keys: Fixed Weights Dictionary Keys (in case of :class:`~psychrnn.backend.models.basic.Basic` and :class:`~psychrnn.backend.models.basic.BasicScan` implementations) * **W_in** (*ndarray(dtype=bool, shape=(:attr:`N_rec`. :attr:`N_in` *)), optional*) -- True for input weights that should be fixed during training. * **W_rec** (*ndarray(dtype=bool, shape=(:attr:`N_rec`, :attr:`N_rec` *)), optional*) -- True for recurrent weights that should be fixed during training. * **W_out** (*ndarray(dtype=bool, shape=(:attr:`N_out`, :attr:`N_rec` *)), optional*) -- True for output weights that should be fixed during training. :Note: In general, any key in the dictionary output by :func:`get_weights` can have a key in the fixed_weights matrix, however fixed_weights will only meaningfully apply to trainable matrices. * **performance_cutoff** (*float*) -- If :data:`performance_measure` is not ``None``, training stops as soon as performance_measure surpases the performance_cutoff. Default: None. * **performance_measure** (*function*) -- Function to calculate the performance of the network using custom criteria. Default: None. :Arguments: * **trial_batch** (*ndarray(dtype=float, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Task stimuli for :attr:`N_batch` trials. * **trial_y** (*ndarray(dtype=float, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Target output for the network on :attr:`N_batch` trials given the :data:`trial_batch`. * **output_mask** (*ndarray(dtype=bool, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Output mask for :attr:`N_batch` trials. True when the network should aim to match the target output, False when the target output can be ignored. * **output** (*ndarray(dtype=bool, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Output to compute the accuracy of. ``output`` as returned by :func:`psychrnn.backend.rnn.RNN.test`. * **epoch** (*int*): Current training epoch (e.g. perhaps the performance_measure is calculated differently early on vs late in training) * **losses** (*list of float*): List of losses from the beginning of training until the current epoch. * **verbosity** (*bool*): Passed in from :data:`train_params`. :Returns: *float* Performance, greater when the performance is better. Returns: tuple: * **losses** (*list of float*) -- List of losses, computed every :data:`loss_epoch` epochs during training. * **training_time** (*float*) -- Time spent training. * **initialization_time** (*float*) -- Time spent initializing the network and preparing to train. """ if not self.is_built: self.build() t0 = time() # -------------------------------------------------- # Extract params # -------------------------------------------------- learning_rate = train_params.get('learning_rate', .001) training_iters = train_params.get('training_iters', 50000) loss_epoch = train_params.get('loss_epoch', 10) verbosity = train_params.get('verbosity', True) save_weights_path = train_params.get('save_weights_path', None) save_training_weights_epoch = train_params.get( 'save_training_weights_epoch', 100) training_weights_path = train_params.get('training_weights_path', None) curriculum = train_params.get('curriculum', None) optimizer = train_params.get( 'optimizer', tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)) clip_grads = train_params.get('clip_grads', True) fixed_weights = train_params.get( 'fixed_weights', None ) # array of zeroes and ones. One indicates to pin and not train that weight. performance_cutoff = train_params.get('performance_cutoff', None) performance_measure = train_params.get('performance_measure', None) if (performance_cutoff is not None and performance_measure is None) or (performance_cutoff is None and performance_measure is not None): raise UserWarning( "training will not be cutoff based on performance. Make sure both performance_measure and performance_cutoff are defined" ) if curriculum is not None: trial_batch_generator = curriculum.get_generator_function() if not isgenerator(trial_batch_generator): trial_batch_generator = trial_batch_generator.batch_generator() # -------------------------------------------------- # Make weights folder if it doesn't already exist. # -------------------------------------------------- if save_weights_path != None: if path.dirname(save_weights_path) != "" and not path.exists( path.dirname(save_weights_path)): makedirs(path.dirname(save_weights_path)) # -------------------------------------------------- # Make train weights folder if it doesn't already exist. # -------------------------------------------------- if training_weights_path != None: if path.dirname(training_weights_path) != "" and not path.exists( path.dirname(training_weights_path)): makedirs(path.dirname(training_weights_path)) # -------------------------------------------------- # Compute gradients # -------------------------------------------------- grads = optimizer.compute_gradients(self.reg_loss) # -------------------------------------------------- # Fixed Weights # -------------------------------------------------- if fixed_weights is not None: for i in range(len(grads)): (grad, var) = grads[i] name = var.name[len(self.name) + 1:-2] if name in fixed_weights.keys(): grad = tf.multiply(grad, (1 - fixed_weights[name])) grads[i] = (grad, var) # -------------------------------------------------- # Clip gradients # -------------------------------------------------- if clip_grads: grads = [(tf.clip_by_norm(grad, 1.0), var) if grad is not None else (grad, var) for grad, var in grads] # -------------------------------------------------- # Call the optimizer and initialize variables # -------------------------------------------------- optimize = optimizer.apply_gradients(grads) self.sess.run(tf.compat.v1.global_variables_initializer()) self.is_initialized = True # -------------------------------------------------- # Record training time for performance benchmarks # -------------------------------------------------- t1 = time() # -------------------------------------------------- # Training loop # -------------------------------------------------- epoch = 1 batch_size = next(trial_batch_generator)[0].shape[0] losses = [] if performance_cutoff is not None: performance = performance_cutoff - 1 while (epoch - 1) * batch_size < training_iters and ( performance_cutoff is None or performance < performance_cutoff): batch_x, batch_y, output_mask, _ = next(trial_batch_generator) self.sess.run(optimize, feed_dict={ self.x: batch_x, self.y: batch_y, self.output_mask: output_mask }) # -------------------------------------------------- # Output batch loss # -------------------------------------------------- if epoch % loss_epoch == 0: reg_loss = self.sess.run(self.reg_loss, feed_dict={ self.x: batch_x, self.y: batch_y, self.output_mask: output_mask }) losses.append(reg_loss) if verbosity: print("Iter " + str(epoch * batch_size) + ", Minibatch Loss= " + \ "{:.6f}".format(reg_loss)) # -------------------------------------------------- # Allow for curriculum learning # -------------------------------------------------- if curriculum is not None and epoch % curriculum.metric_epoch == 0: trial_batch, trial_y, output_mask, _ = next( trial_batch_generator) output, _ = self.test(trial_batch) if curriculum.metric_test(trial_batch, trial_y, output_mask, output, epoch, losses, verbosity): if curriculum.stop_training: break trial_batch_generator = curriculum.get_generator_function() # -------------------------------------------------- # Save intermediary weights # -------------------------------------------------- if epoch % save_training_weights_epoch == 0: if training_weights_path is not None: self.save(training_weights_path + str(epoch)) if verbosity: print("Training weights saved in file: %s" % training_weights_path + str(epoch)) # --------------------------------------------------- # Update performance value if necessary # --------------------------------------------------- if performance_measure is not None: trial_batch, trial_y, output_mask, _ = next( trial_batch_generator) output, _ = self.test(trial_batch) performance = performance_measure(trial_batch, trial_y, output_mask, output, epoch, losses, verbosity) if verbosity: print("performance: " + str(performance)) epoch += 1 t2 = time() if verbosity: print("Optimization finished!") # -------------------------------------------------- # Save final weights # -------------------------------------------------- if save_weights_path is not None: self.save(save_weights_path) if verbosity: print("Model saved in file: %s" % save_weights_path) # -------------------------------------------------- # Return losses, training time, initialization time # -------------------------------------------------- return losses, (t2 - t1), (t1 - t0)
def create_variables(self): # создание нейросети T копированием из исходной нейросети N self.target_q_network = self.q_network.copy(scope="target_network") # расчет управляющего действия # FOR REGULAR ACTION SCORE COMPUTATION with tf.name_scope("taking_action"): # входные данные вектора состояния self.observation = tf.placeholder(tf.float32, (None, self.observation_size), name="observation") # расчитать очки оценки полезности каждого действия self.action_scores = tf.identity(self.q_network(self.observation), name="action_scores") tf.histogram_summary("action_scores", self.action_scores) # взять действие с максимальным количеством очков self.predicted_actions = tf.argmax(self.action_scores, dimension=1, name="predicted_actions") # расчет будущей пользы with tf.name_scope("estimating_future_rewards"): # FOR PREDICTING TARGET FUTURE REWARDS # входной параметр - будущие состояния self.next_observation = tf.placeholder( tf.float32, (None, self.observation_size), name="next_observation") # входной параметр - маски будущих состояний self.next_observation_mask = tf.placeholder( tf.float32, (None, ), name="next_observation_mask") # оценки полезности self.next_action_scores = tf.stop_gradient( self.target_q_network(self.next_observation)) tf.histogram_summary("target_action_scores", self.next_action_scores) # входной параметр - награды self.rewards = tf.placeholder(tf.float32, (None, ), name="rewards") # взять максимальные оценки полезностей действий target_values = tf.identity( tf.reduce_max(self.next_action_scores, reduction_indices=[ 1, ]) * self.next_observation_mask, name="target_values") # r + DF * MAX(Q,s) см статью о Q-learning в википедии #self.future_rewards = self.rewards + self.discount_rate * target_values self.future_rewards = tf.identity( self.rewards + self.discount_rate * target_values, name="future_rewards") # обученте сети N with tf.name_scope("q_value_precition"): # FOR PREDICTION ERROR # входной параметр маски действий в наборе обучающих примеров self.action_mask = tf.placeholder(tf.float32, (None, self.num_actions), name="action_mask") # расчет полезностей действий набора обучающих примеров self.masked_action_scores = tf.reduce_sum( self.action_scores * self.action_mask, reduction_indices=[ 1, ], name="masked_action_scores") # разности текущих полезностей и будущих # - (r + DF * MAX(Q,s) — Q[s',a']) #temp_diff = self.masked_action_scores - self.future_rewards temp_diff = tf.identity(self.masked_action_scores - self.future_rewards, name="temp_diff") # ключевой момент обучения сети # RMSProp минимизирует среднее от вышеуказанных разностей self.prediction_error = tf.reduce_mean(tf.square(temp_diff), name="prediction_error") # работа RMSProp, первый шаг - вычисление градиентов gradients = self.optimizer.compute_gradients(self.prediction_error) #def get_zero(): return tf.constant(0.0) #def get_perror(): return self.prediction_error #gradients = self.optimizer.compute_gradients(tf.cond(tf.is_nan(self.prediction_error), get_zero, get_perror)) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, 5), var) # Add histograms for gradients. for grad, var in gradients: tf.histogram_summary(var.name, var) if grad is not None: tf.histogram_summary(var.name + '/gradients', grad) # второй шаг - оптимизация параметров нейросети self.train_op = self.optimizer.apply_gradients(gradients, name="train_op") # то самое место где настраивается сеть T # T = (1-alpha)*T + alpha*N # UPDATE TARGET NETWORK with tf.name_scope("target_network_update"): self.target_network_update = [] for v_source, v_target in zip(self.q_network.variables(), self.target_q_network.variables()): # this is equivalent to target = (1-alpha) * target + alpha * source update_op = v_target.assign_sub( self.target_network_update_rate * (v_target - v_source)) self.target_network_update.append(update_op) self.target_network_update = tf.group(*self.target_network_update, name="target_network_update") # summaries tf.scalar_summary("prediction_error", self.prediction_error) self.summarize = tf.merge_all_summaries() self.no_op1 = tf.no_op()
session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) with sess.as_default(): stan_reader = StanfordReader(max_entities=5, batch_size=FLAGS.batch_size) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate = FLAGS.learning_rate) # aggregation_method is an experimental feature introduced for faster gradient computation grads_and_vars = optimizer.compute_gradients(stan_reader.loss, aggregation_method = 2) clipped_grads = [] for g, v in grads_and_vars: if g is not None: clipped = tf.clip_by_norm(g, clip_norm=10.) clipped_grads.append((clipped, v)) train_op = optimizer.apply_gradients(clipped_grads, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g) sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.merge_summary(grad_summaries) # Output directory for models and summaries
def __init__(self, mode="train"): # Load vocabulary self.char2idx, self.idx2char = load_vocab() # Set phase is_training = True if mode == "train" else False # Graph # Data Feeding # x: Text. (N, Tx) # y: Reduced melspectrogram. (N, Ty//r, n_mels*r) # z: Magnitude. (N, Ty, n_fft//2+1) if mode == "train": self.x, self.y, self.z, self.fnames, self.num_batch = get_batch() elif mode == "eval": self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.z = tf.placeholder(tf.float32, shape=(None, None, 1 + hp.n_fft // 2)) self.fnames = tf.placeholder(tf.string, shape=(None, )) else: # Synthesize self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) # Get encoder/decoder inputs self.encoder_inputs = embed(self.x, len(hp.vocab), hp.embed_size) # (N, T_x, E) self.decoder_inputs = tf.concat( (tf.zeros_like(self.y[:, :1, :]), self.y[:, :-1, :]), 1) # (N, Ty/r, n_mels*r) self.decoder_inputs = self.decoder_inputs[:, :, -hp. n_mels:] # feed last frames only (N, Ty/r, n_mels) # Networks with tf.variable_scope("net"): # Encoder self.memory = encoder(self.encoder_inputs, is_training=is_training) # (N, T_x, E) # Decoder1 self.y_hat, self.alignments = decoder1( self.decoder_inputs, self.memory, is_training=is_training) # (N, T_y//r, n_mels*r) # Decoder2 or postprocessing self.z_hat = decoder2( self.y_hat, is_training=is_training) # (N, T_y//r, (1+n_fft//2)*r) # monitor self.audio = tf.py_func(spectrogram2wav, [self.z_hat[0]], tf.float32) if mode in ("train", "eval"): # Loss self.loss1 = tf.reduce_mean(tf.abs(self.y_hat - self.y)) self.loss2 = tf.reduce_mean(tf.abs(self.z_hat - self.z)) self.loss = self.loss1 + self.loss2 # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.lr = learning_rate_decay(hp.lr, global_step=self.global_step) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) ## gradient clipping self.gvs = self.optimizer.compute_gradients(self.loss) self.clipped = [] for grad, var in self.gvs: grad = tf.clip_by_norm(grad, 5.) self.clipped.append((grad, var)) self.train_op = self.optimizer.apply_gradients( self.clipped, global_step=self.global_step) # Summary tf.summary.scalar('{}/loss1'.format(mode), self.loss1) tf.summary.scalar('{}/loss'.format(mode), self.loss) tf.summary.scalar('{}/lr'.format(mode), self.lr) tf.summary.image("{}/mel_gt".format(mode), tf.expand_dims(self.y, -1), max_outputs=1) tf.summary.image("{}/mel_hat".format(mode), tf.expand_dims(self.y_hat, -1), max_outputs=1) tf.summary.image("{}/mag_gt".format(mode), tf.expand_dims(self.z, -1), max_outputs=1) tf.summary.image("{}/mag_hat".format(mode), tf.expand_dims(self.z_hat, -1), max_outputs=1) tf.summary.audio("{}/sample".format(mode), tf.expand_dims(self.audio, 0), hp.sr) self.merged = tf.summary.merge_all()
def __init__(self, batch_size, vocab_size, sentence_size, memory_size, embedding_size, hops=3, max_grad_norm=40.0, nonlin=None, initializer=tf.random_normal_initializer(stddev=0.1), encoding=position_encoding, session=tf.Session(config=tf.ConfigProto( gpu_options=tf_gpu_options)), l2=0.02, lr=0.01, epsilon=1e-8, restoreLoc=None, name='MemN2N'): """Creates an End-To-End Memory Network Args: batch_size: The size of the batch. vocab_size: The size of the vocabulary (should include the nil word). The nil word one-hot encoding should be 0. sentence_size: The max size of a sentence in the data. All sentences should be padded to this length. If padding is required it should be done with nil one-hot encoding (0). memory_size: The max size of the memory. Since Tensorflow currently does not support jagged arrays all memories must be padded to this length. If padding is required, the extra memories should be empty memories; memories filled with the nil word ([0, 0, 0, ......, 0]). embedding_size: The size of the word embedding. hops: The number of hops. A hop consists of reading and addressing a memory slot. Defaults to `3`. max_grad_norm: Maximum L2 norm clipping value. Defaults to `40.0`. nonlin: Non-linearity. Defaults to `None`. initializer: Weight initializer. Defaults to `tf.random_normal_initializer(stddev=0.1)`. optimizer: Optimizer algorithm used for SGD. Defaults to `tf.train.AdamOptimizer(learning_rate=1e-2)`. encoding: A function returning a 2D Tensor (sentence_size, embedding_size). Defaults to `position_encoding`. session: Tensorflow Session the model is run with. Defaults to `tf.Session()`. name: Name of the End-To-End Memory Network. Defaults to `MemN2N`. """ self._batch_size = batch_size self._vocab_size = vocab_size self._sentence_size = sentence_size self._memory_size = memory_size self._embedding_size = embedding_size self._hops = hops self._max_grad_norm = max_grad_norm self._nonlin = nonlin self._init = initializer self._opt = tf.train.AdamOptimizer(learning_rate=lr, epsilon=epsilon) self._name = name self._l2 = l2 self._build_inputs() self._build_vars() self._encoding = tf.constant(encoding(self._sentence_size, self._embedding_size), name="encoding") # cross entropy logits = self._inference(self._stories, self._queries) # (batch_size, vocab_size) cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=tf.cast(self._answers, tf.float32), name="cross_entropy") cross_entropy_sum = tf.reduce_sum(cross_entropy, name="cross_entropy_sum") # loss op reg_loss = self._l2 * tf.add_n(tf.get_collection('reg_loss')) loss_op = cross_entropy_sum + reg_loss loss_op_summary = tf.summary.scalar("loss", loss_op) ema = tf.train.ExponentialMovingAverage(decay=0.99) self.update_loss_ema = ema.apply([loss_op]) loss_ema = ema.average(loss_op) self.loss_ema_op = tf.summary.scalar('batch_loss_ema', loss_ema) # gradient pipeline grads_and_vars = self._opt.compute_gradients(loss_op) grads_and_vars = [(tf.clip_by_norm(g, self._max_grad_norm), v) for g, v in grads_and_vars] grads_and_vars = [(add_gradient_noise(g), v) for g, v in grads_and_vars] nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in self._nil_vars: nil_grads_and_vars.append((zero_nil_slot(g), v)) else: nil_grads_and_vars.append((g, v)) train_op = self._opt.apply_gradients(nil_grads_and_vars, name="train_op") # predict ops predict_op = tf.argmax(logits, 1, name="predict_op") predict_proba_op = tf.nn.softmax(logits, name="predict_proba_op") predict_log_proba_op = tf.log(predict_proba_op, name="predict_log_proba_op") # validation accuracy ops self.val_acc_op = self._get_val_acc(predict_op, self._val_answers) self.val_acc_summary = tf.summary.scalar("val_acc", self.val_acc_op) # assign ops self.loss_op = loss_op self.predict_op = predict_op self.predict_proba_op = predict_proba_op self.predict_log_proba_op = predict_log_proba_op self.train_op = train_op self.loss_op_summary = loss_op_summary # Summaries self.merged = tf.summary.merge_all() self._sess = session if restoreLoc is not None: saver = tf.train.Saver() saver.restore(self._sess, restoreLoc) else: init_op = tf.initialize_all_variables() self._sess.run(init_op)
def train(): colorlog.basicConfig( filename=None, level=logging.INFO, format="%(log_color)s[%(levelname)s:%(asctime)s]%(reset)s %(message)s", datafmt="%Y-%m-%d %H:%M:%S") gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) as sess: global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) num_examples_per_epoch, tower_img_embedding, tower_context_length, \ tower_caption_length, tower_context_id, tower_caption_id, \ tower_answer_id, tower_context_mask, \ tower_caption_mask = enqueue(False) # Calculate the learning rate schedule. num_batches_per_epoch = (num_examples_per_epoch / FLAGS.batch_size / FLAGS.num_gpus) decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(FLAGS.init_lr, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) # Create an optimizer that performs gradient descent. opt = tf.train.AdamOptimizer(lr) # Calculate the gradients for each model tower. tower_grads = [] with tf.variable_scope(tf.get_variable_scope()) as scope: for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: # Calculate the loss for one tower of the CIFAR model. This function # constructs the entire CIFAR model but shares the variables across # all towers. inputs = [ tower_img_embedding[i], tower_context_length[i], tower_caption_length[i], tower_context_id[i], tower_caption_id[i], tower_answer_id[i], tower_context_mask[i], tower_caption_mask[i] ] loss = _tower_loss(inputs, scope) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = _average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf.summary.scalar('learning_rate', lr)) clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], \ FLAGS.max_grad_norm), gv[1]) for gv in grads] # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(clipped_grads_and_vars, global_step=global_step) # Create a saver. saver = tf.train.Saver(tf.global_variables(), max_to_keep=200) # Build the summary operation from the last tower summaries. summary_op = tf.summary.merge(summaries) # Build an initialization operation to run below. init = tf.global_variables_initializer() sess.run(init) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([apply_gradient_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if (step + 1) % 10 == 0: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / FLAGS.num_gpus format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') c_g_step = int(global_step.eval(session=sess)) print(format_str % (datetime.now(), c_g_step, loss_value, examples_per_sec, sec_per_batch)) if (step + 1) % 25 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, c_g_step) # Save the model checkpoint periodically. if (step + 1) % 500 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=c_g_step)
def build_train(make_obs_ph, q_func, num_actions, optimizer_f, grad_norm_clipping=None, gamma=1.0, scope="setdeepq", reuse=None, test_eps=0.05, lr_init=0.001, lr_period_steps=250000, tau=0.05): """Creates the train function: Parameters ---------- make_obs_ph: str -> tf.placeholder or TfInput a function that takes a name and creates a placeholder of input with that name q_func: (tf.Variable, int, str, bool) -> tf.Variable the model that takes the following inputs: observation_in: object the output of observation placeholder num_actions: int number of actions scope: str reuse: bool should be passed to outer variable scope and returns a tensor of shape (batch_size, num_actions) with values of every action. num_actions: int number of actions reuse: bool whether or not to reuse the graph variables optimizer: tf.train.Optimizer optimizer to use for the Q-learning objective. grad_norm_clipping: float or None clip gradient norms to this value. If None no clipping is performed. gamma: float discount rate. scope: str or VariableScope optional scope for variable_scope. reuse: bool or None whether or not the variables should be reused. To be able to reuse the scope must be given. lr_init : float initial learning rate lr_period : int learning rate schedule following a cosine with this period tau : float parameter for the soft target network update. tau <= 1.0 and 1.0 for the hard update. Returns ------- act: (tf.Variable, bool, float) -> tf.Variable function to select and action given observation. ` See the top of the file for details. train: (object, np.array, np.array, object, np.array, np.array) -> np.array optimize the error in Bellman's equation. ` See the top of the file for details. update_target: () -> () copy the parameters from optimized Q function to the target Q function. ` See the top of the file for details. debug: {str: function} a bunch of functions to print debug data like q_values. """ # Build action graphs act_f = build_act(make_obs_ph, q_func, num_actions, scope=scope, reuse=reuse) act_greedy = build_act_greedy(make_obs_ph, q_func, num_actions, scope=scope, reuse=True, eps=test_eps) with tf.compat.v1.variable_scope(scope, reuse=reuse): # set up placeholders obs_t_input = make_obs_ph("obs_t") act_t_ph = tf.compat.v1.placeholder(tf.int32, [None], name="action") rew_t_ph = tf.compat.v1.placeholder(tf.float32, [None], name="reward") obs_tp1_input = make_obs_ph("obs_tp1") done_mask_ph = tf.compat.v1.placeholder(tf.float32, [None], name="done") importance_weights_ph = tf.compat.v1.placeholder(tf.float32, [None], name="weight") iteration = tf.compat.v1.placeholder(tf.float32, name="iteration") # Cosine learning rate adjustment lr = tf.Variable(float(lr_init), trainable=False, dtype=tf.float32, name='lr') lr = tf.clip_by_value( 0.0005 * tf.math.cos(math.pi * iteration / lr_period_steps) + 0.000501, 1e-6, 1e-3) optimizer = optimizer_f(learning_rate=lr) # q network evaluation q1_t = q_func.forward(obs_t_input.get(), num_actions, scope="q1_func", reuse=True) # reuse q1 parameters from act q1_func_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=tf.compat.v1.get_variable_scope().name + "/q1_func") q2_t = q_func.forward(obs_t_input.get(), num_actions, scope="q2_func", reuse=True) # reuse q2 parameters from act q2_func_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=tf.compat.v1.get_variable_scope().name + "/q2_func") # target q network evalution q1_tp1 = q_func.forward(obs_tp1_input.get(), num_actions, scope="target_q1_func", reuse=False) target_q1_func_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=tf.compat.v1.get_variable_scope().name + "/target_q1_func") q2_tp1 = q_func.forward(obs_tp1_input.get(), num_actions, scope="target_q2_func", reuse=False) target_q2_func_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=tf.compat.v1.get_variable_scope().name + "/target_q2_func") # q scores for actions which we know were selected in the given state. q1_t_selected = tf.reduce_sum(input_tensor=q1_t * tf.one_hot(act_t_ph, num_actions), axis=1) q2_t_selected = tf.reduce_sum(input_tensor=q2_t * tf.one_hot(act_t_ph, num_actions), axis=1) # Actions selected with current q funcs at state t+1. q1_tp1_using_online_net = q_func.forward(obs_tp1_input.get(), num_actions, scope="q1_func", reuse=True) q2_tp1_using_online_net = q_func.forward(obs_tp1_input.get(), num_actions, scope="q2_func", reuse=True) tp1_best_action_using_online_net = tf.argmax( input=q1_tp1_using_online_net + q2_tp1_using_online_net, axis=1) # Using action at t+1 find target value associated with the action q1_tp1_selected = tf.reduce_sum( input_tensor=q1_tp1 * tf.one_hot(tp1_best_action_using_online_net, num_actions), axis=1) q2_tp1_selected = tf.reduce_sum( input_tensor=q2_tp1 * tf.one_hot(tp1_best_action_using_online_net, num_actions), axis=1) # Min of target q values to be used bellman equation q_tp1_best = tf.minimum(q1_tp1_selected, q2_tp1_selected) # compute RHS of bellman equation q_tp1_selected_target = rew_t_ph + gamma * q_tp1_best # compute the error (potentially clipped) td_error1 = q1_t_selected - tf.stop_gradient(q_tp1_selected_target) td_error2 = q2_t_selected - tf.stop_gradient(q_tp1_selected_target) errors1 = U.huber_loss(td_error1) errors2 = U.huber_loss(td_error2) errors = errors1 + errors2 weighted_error = tf.reduce_mean(input_tensor=importance_weights_ph * errors) #Print total number of params total_parameters = 0 for variable in tf.compat.v1.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value # print("var params", variable_parameters) total_parameters += variable_parameters print( "===============================================================") print("Total number of trainable params:", total_parameters) print( "===============================================================") # Log for tensorboard tf.summary.scalar('q1_values', tf.math.reduce_mean(q1_t)) tf.summary.scalar('q2_values', tf.math.reduce_mean(q2_t)) tf.summary.scalar('td_1', tf.math.reduce_mean(td_error1)) tf.summary.scalar('td_2', tf.math.reduce_mean(td_error2)) tf.summary.scalar('weighted_loss', weighted_error) tf.summary.scalar('lr_schedule', lr) tf.summary.scalar('td_MSE_1', tf.math.reduce_mean(tf.math.square(td_error1))) tf.summary.scalar('td_MSE_2', tf.math.reduce_mean(tf.math.square(td_error2))) # combine variable scopes q_func_vars = q1_func_vars + q2_func_vars # compute optimization op (potentially with gradient clipping) if grad_norm_clipping is not None: gradients = optimizer.compute_gradients(weighted_error, var_list=q_func_vars) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, grad_norm_clipping), var) optimize_expr = optimizer.apply_gradients(gradients) else: optimize_expr = optimizer.minimize(weighted_error, var_list=q_func_vars) # update_target_fn will be called every step to copy Q network to target Q network # target network is updated with polyak averaging update_target_expr1 = [] for var, var_target in zip( sorted(q1_func_vars, key=lambda v: v.name), sorted(target_q1_func_vars, key=lambda v: v.name)): update_target_expr1.append( var_target.assign(tau * var + (1 - tau) * var_target)) update_target_expr1 = tf.group(*update_target_expr1) update_target_expr2 = [] for var, var_target in zip( sorted(q2_func_vars, key=lambda v: v.name), sorted(target_q2_func_vars, key=lambda v: v.name)): update_target_expr2.append( var_target.assign(tau * var + (1 - tau) * var_target)) update_target_expr2 = tf.group(*update_target_expr2) merged_summary = tf.compat.v1.summary.merge_all( scope=tf.compat.v1.get_variable_scope().name) # Create callable functions train = U.function(inputs=[ obs_t_input, act_t_ph, rew_t_ph, obs_tp1_input, done_mask_ph, importance_weights_ph, iteration ], outputs=[ td_error1, td_error2, tf.reduce_mean(input_tensor=errors), merged_summary ], updates=[optimize_expr, lr]) update_target = U.function( [], [], updates=[update_target_expr1, update_target_expr2]) q_values = U.function(inputs=[obs_t_input], outputs=[q1_t, q2_t]) return act_f, act_greedy, q_values, train, update_target, { 'q_values': q_values }
def grad_clip_fn(self, opt, loss, tvars, **kargs): gpu_count = self.config.get('gpu_count', 1) grad_name = kargs.get('grad_name', "grad_norm") grad_ratio = kargs.get('grad_name', {}) if self.config.get("opt_type", "pai_soar") == "pai_soar": loss_fn = opt.compute_loss(loss, loss_scale=self.config.get( "loss_scale", 1)) grads_and_vars = opt.compute_gradients( loss_fn, colocate_gradients_with_ops=True) else: grads_and_vars = opt.compute_gradients(loss, tvars) valid_vars = [] for grad, var in grads_and_vars: if grad is not None: valid_vars.append(var) print(grad, var, tf.is_nan(grad), '====nan grad====') else: print(var.name, "=====none grad======", grad_name) grads = [grad for grad, _ in grads_and_vars if grad is not None] # allreduce from sum to mean # grads_and_vars = zip(valid_grads, valid_vars) grad_clip = self.config.get("grad_clip", "global_norm") use_norm = tf.global_norm(grads) tf.summary.scalar(grad_name + '/total_grad_norm', use_norm) for grad, var in grads_and_vars: if grad is not None: var_grad_norm = tf.global_norm([grad]) tf.summary.scalar(grad_name + "/" + var.name, var_grad_norm) # tf.summary.histogram(var.name, var) # tf.summary.histogram("grad/"+var.name, grad) tf.logging.info(" gradient clip method {}".format(grad_clip)) if grad_clip == "global_norm": clip_norm = self.config.get("clip_norm", 1.0) if self.config.get("strategy", "") in [ 'MirroredStrategy', 'CollectiveAllReduceStrategy' ]: use_norm = tf.global_norm(grads) [scale_grads, _] = tf.clip_by_global_norm( grads, clip_norm=clip_norm, use_norm=use_norm * tf.sqrt(gpu_count * 1.0)) tf.summary.scalar(grad_name + '/grad_scale', use_norm * tf.sqrt(gpu_count * 1.0)) else: [scale_grads, _] = tf.clip_by_global_norm(grads, clip_norm=clip_norm) elif grad_clip == "norm": clip_norm = self.config.get("clip_norm", 1.0) scale_grads = [ tf.clip_by_norm(grad, clip_norm) for grad in grads ] elif grad_clip == "value": clip_min_value = self.config.get("clip_min_value", -1.0) clip_max_value = self.config.get("clip_max_value", 1.0) scale_grads = [ tf.clip_by_value(grad, clip_norm) for grad in grads ] else: scale_grads = grads grads_and_vars = zip(scale_grads, valid_vars) return grads_and_vars
def build_graph(self, graph, embedding_array, Config): """ :param graph: :param embedding_array: :param Config: :return: """ with graph.as_default(): self.embeddings = tf.Variable(embedding_array, dtype=tf.float32) """ =================================================================== Define the computational graph with necessary variables. """ self.train_inputs = tf.placeholder(tf.int32, shape=[constants.batch_size, constants.n_Tokens]) self.train_labels = tf.placeholder(tf.int32, shape=[constants.batch_size, parsing_system.numTransitions()]) train_embedding_lookup = tf.nn.embedding_lookup(self.embeddings, self.train_inputs) train_embed = tf.reshape(train_embedding_lookup, [constants.batch_size, -1]) # Masking out invalid -1 transitions in train_labels #train_labels = tf.nn.relu(self.train_labels) #weights_input = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.embedding_size * constants.n_Tokens], stddev=1/math.sqrt(constants.embedding_size * constants.n_Tokens)) '''biases_input = tf.Variable(tf.zeros([constants.hidden_size,1])) weights_output = tf.Variable(tf.truncated_normal(shape=[parsing_system.numTransitions(), constants.hidden_size], stddev= 1/ math.sqrt((constants.hidden_size)))) #self.predictions = self.forward_pass_parallel(train_embed, weights_words, weights_tags, weights_labels, biases_words, biases_tags, biases_labels, weights_output)''' '''train_labels = tf.nn.relu(self.train_labels) self.predictions = self.forward_pass(train_embed, weights_input, biases_input, weights_output) self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.predictions, labels=train_labels) thetas = tf.nn.l2_loss(train_embed) + tf.nn.l2_loss(weights_input) + tf.nn.l2_loss(biases_input) + tf.nn.l2_loss(weights_output) self.loss = tf.reduce_mean(self.loss + constants.lam * thetas)''' ######################################## Remove these comment for 2 hidden layer implementation '''weights_input = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.embedding_size * constants.n_Tokens], stddev=1/math.sqrt(constants.embedding_size * constants.n_Tokens))) biases_input = tf.Variable(tf.zeros(shape = [constants.hidden_size, 1])) weights2 = tf.Variable(tf.truncated_normal(shape=[constants.hidden2_size, constants.hidden_size], stddev= 1/ math.sqrt((constants.hidden_size)))) biases2 = tf.Variable(tf.zeros(shape = [constants.hidden2_size, 1])) weights_output = tf.Variable(tf.truncated_normal(shape=[parsing_system.numTransitions(), constants.hidden2_size], stddev=1 / math.sqrt(constants.hidden2_size))) self.predictions = self.forward_pass_2_hidden(train_embed, weights_input, biases_input,weights2, biases2, weights_output) train_labels = tf.nn.relu(self.train_labels) self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.predictions, labels=train_labels) thetas2 = tf.nn.l2_loss(train_embed) + tf.nn.l2_loss(weights_input) + tf.nn.l2_loss(biases_input) + tf.nn.l2_loss(weights2) + tf.nn.l2_loss(biases2) + tf.nn.l2_loss(weights_output) self.loss = tf.reduce_mean(self.loss + constants.lam * thetas2)''' ################### Alternate2 hidden layer '''weights_input = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.embedding_size * constants.n_Tokens], stddev=0.1)) biases_input = tf.Variable(tf.random_normal(stddev= 0.1, shape = [constants.hidden_size])) weights2 = tf.Variable(tf.truncated_normal(shape=[constants.hidden2_size, constants.hidden_size], stddev=0.1)) biases2 = tf.Variable(tf.random_normal(stddev= 0.1, shape = [constants.hidden2_size])) weights_output = tf.Variable(tf.truncated_normal(shape=[parsing_system.numTransitions(), constants.hidden2_size], stddev=0.1)) self.predictions = self.forward_pass_2_hidden_alt(train_embed, weights_input, biases_input,weights2, biases2, weights_output) print self.predictions #self.predictions = tf.Print(self.predictions, [self.predictions]) train_labels = tf.nn.relu(self.train_labels) self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.predictions, labels=train_labels) thetas2 = tf.nn.l2_loss(train_embed) + tf.nn.l2_loss(weights_input) + tf.nn.l2_loss(biases_input) + tf.nn.l2_loss(weights2) + tf.nn.l2_loss(biases2) + tf.nn.l2_loss(weights_output) self.loss = tf.reduce_mean(self.loss + constants.lam * thetas2)''' ############################# Remove the comments for 3 hidden layer implementation ################################# '''weights_input = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.embedding_size * constants.n_Tokens], stddev=1/math.sqrt(constants.embedding_size * constants.n_Tokens))) biases_input = tf.Variable(tf.zeros(shape = [constants.hidden_size, 1])) weights2 = tf.Variable(tf.truncated_normal(shape=[constants.hidden2_size, constants.hidden_size], stddev= 1/ math.sqrt((constants.hidden_size)))) biases2 = tf.Variable(tf.zeros(shape = [constants.hidden2_size, 1])) weights3 = tf.Variable(tf.truncated_normal(shape=[constants.hidden3_size, constants.hidden2_size], stddev= 1/ math.sqrt((constants.hidden2_size)))) biases3 = tf.Variable(tf.zeros([constants.hidden3_size, 1])) weights_output = tf.Variable(tf.truncated_normal(shape=[parsing_system.numTransitions(), constants.hidden3_size], stddev= 1/ math.sqrt((constants.hidden3_size)))) self.predictions = self.forward_pass_3_hidden(train_embed, weights_input, biases_input,weights2, biases2, weights3, biases3, weights_output) train_labels = tf.nn.relu(self.train_labels) self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.predictions, labels=train_labels) thetas3 = tf.nn.l2_loss(train_embed) + tf.nn.l2_loss(weights_input) + tf.nn.l2_loss(biases_input) + tf.nn.l2_loss(weights2) + tf.nn.l2_loss(biases2) + tf.nn.l2_loss(weights3) + tf.nn.l2_loss(biases3) + tf.nn.l2_loss(weights_output) self.loss = tf.reduce_mean(self.loss + constants.lam * thetas3)''' ##################### Use below commented code for 3 parralel layers for words, tags and labels ############### train_embed_words = tf.slice(train_embedding_lookup, [0, 0, 0], [constants.batch_size, constants.n_Tokens_word, constants.embedding_size]) train_embed_words = tf.reshape(train_embed_words, [constants.batch_size, -1]) train_embed_pos = tf.slice(train_embedding_lookup, [0, 18, 0], [constants.batch_size, constants.n_Tokens_pos, constants.embedding_size]) train_embed_pos = tf.reshape(train_embed_pos, [constants.batch_size, -1]) train_embed_labels = tf.slice(train_embedding_lookup, [0, 36, 0], [constants.batch_size, constants.n_Tokens_labels, constants.embedding_size]) train_embed_labels = tf.reshape(train_embed_labels, [constants.batch_size, -1]) weights_output_words = tf.Variable(tf.random_normal(shape=[parsing_system.numTransitions(), constants.hidden_size], stddev=1.0/math.sqrt(constants.hidden_size))) weights_output_pos = tf.Variable(tf.random_normal(shape=[parsing_system.numTransitions(), constants.hidden_size], stddev=1.0/math.sqrt(constants.hidden_size))) weights_output_labels = tf.Variable(tf.random_normal(shape=[parsing_system.numTransitions(), constants.hidden_size], stddev=1.0/math.sqrt(constants.hidden_size))) weights_input_words = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.n_Tokens_word * constants.embedding_size], stddev=0.1)) biases_input_words = tf.Variable(tf.zeros([constants.hidden_size, 1])) weights_input_pos = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.n_Tokens_pos * constants.embedding_size], stddev=0.1)) biases_input_pos = tf.Variable(tf.zeros([constants.hidden_size, 1])) weights_input_labels = tf.Variable(tf.truncated_normal(shape=[constants.hidden_size, constants.n_Tokens_labels * constants.embedding_size], stddev=0.1)) biases_input_labels = tf.Variable(tf.zeros([constants.hidden_size, 1])) self.prediction_words = self.forward_pass(train_embed_words, weights_input_words, biases_input_words, weights_output_words) self.prediction_pos = self.forward_pass(train_embed_pos, weights_input_pos, biases_input_pos, weights_output_pos) self.prediction_labels = self.forward_pass(train_embed_labels, weights_input_labels, biases_input_labels, weights_output_labels) train_labels = tf.nn.relu(self.train_labels) loss_words = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.prediction_words, labels=train_labels) loss_pos = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.prediction_pos, labels=train_labels) loss_labels = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.prediction_labels, labels=train_labels) l2_input_words = constants.lam * tf.nn.l2_loss(weights_input_words) l2_biases_words = constants.lam * tf.nn.l2_loss(biases_input_words) l2_input_pos = constants.lam * tf.nn.l2_loss(weights_input_pos) l2_biases_pos = constants.lam * tf.nn.l2_loss(biases_input_pos) l2_input_labels = constants.lam * tf.nn.l2_loss(weights_input_labels) l2_biases_labels = constants.lam * tf.nn.l2_loss(biases_input_labels) l2_output_words = constants.lam * tf.nn.l2_loss(weights_output_words) l2_output_pos = constants.lam * tf.nn.l2_loss(weights_output_pos) l2_output_labels = constants.lam * tf.nn.l2_loss(weights_output_labels) l2_embed_words = constants.lam * tf.nn.l2_loss(train_embed_words) l2_embed_pos = constants.lam * tf.nn.l2_loss(train_embed_words) l2_embed_labels = constants.lam * tf.nn.l2_loss(train_embed_words) l2_loss = (loss_words + l2_input_words + l2_biases_words + l2_output_words + l2_embed_words) + \ (loss_pos + l2_input_pos + l2_biases_pos + l2_output_pos + l2_embed_pos) + \ (loss_labels + l2_input_labels + l2_biases_labels + l2_output_labels + l2_embed_labels) #------------------------------------------------------------------------------------------------------------------# # Take average loss over the entire batch self.loss = tf.reduce_mean(l2_loss) #################====================================================########################## ############## gradient descent computation with gradient clipping ############## optimizer = tf.train.GradientDescentOptimizer(constants.learning_rate) grads = optimizer.compute_gradients(self.loss) clipped_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in grads] self.app = optimizer.apply_gradients(clipped_grads) ################### Test Predictions ####################################### self.test_inputs = tf.placeholder(tf.int32, shape=[constants.n_Tokens]) test_embed = tf.nn.embedding_lookup(self.embeddings, self.test_inputs) test_embed = tf.reshape(test_embed, [1, -1]) #self.test_pred = self.forward_pass(test_embed, weights_input, biases_input, weights_output) ############ Use below commented code to run for 2 hidden layers ########## #self.test_pred = self.forward_pass_2_hidden(test_embed, weights_input, biases_input, weights2, biases2, weights_output) ############ Use below commented code for 2 hidden alternate ################ #self.test_pred = self.forward_pass_2_hidden_alt(test_embed, weights_input, biases_input, weights2, biases2, weights_output) ########### Use below commented code for 3 hidden layer implementation #self.test_pred = self.forward_pass_3_hidden(test_embed, weights_input, biases_input, weights2, biases2, weights3, biases3, weights_output) # Prediction for the test data test_embed_words = tf.slice(test_embed, [0, 0], [constants.n_Tokens_words, test_embed.get_shape()[1]]) test_embed_words = tf.reshape(test_embed_words, [1, -1]) test_embed_pos = tf.slice(test_embed, [18, 0], [constants.n_Tokens_pos, test_embed.get_shape()[1]]) test_embed_pos = tf.reshape(test_embed_pos, [1, -1]) test_embed_labels = tf.slice(test_embed, [36, 0], [constants.n_Tokens_labels, test_embed.get_shape()[1]]) test_embed_labels = tf.reshape(test_embed_labels, [1, -1]) test_pred_words = self.forward_pass(test_embed_words, weights_input_words, biases_input_words, weights_output_words) test_pred_pos = self.forward_pass(test_embed_pos, weights_input_pos, biases_input_pos, weights_output_pos) test_pred_labels = self.forward_pass(test_embed_labels, weights_input_labels, biases_input_labels, weights_output_labels) self.test_pred = (test_pred_words + test_pred_pos + test_pred_labels) / 3 # intializer self.init = tf.global_variables_initializer()
def _model_build(self): with tf.variable_scope(self.scope): # Inputs are 4 image frames with shape 84x84 self.X = tf.placeholder(shape=[None, 84, 84, 4], dtype=tf.uint8, name="X") self.y = tf.placeholder(shape=[None], dtype=tf.float32, name="y") self.isTraining = tf.placeholder(dtype=tf.bool, name="isTraining") self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name="actions") X = tf.to_float(self.X) / 255.0 batch_size = tf.shape(self.X)[0] # CNN with Batchnorm conv1 = tf.contrib.layers.conv2d(X, 32, 8, 4, activation_fn=None) conv1_bn = tf.contrib.layers.batch_norm( conv1, center=True, scale=True, is_training=self.isTraining) h1 = tf.nn.relu(conv1_bn, 'relu') conv2 = tf.contrib.layers.conv2d(h1, 64, 4, 2, activation_fn=None) conv2_bn = tf.contrib.layers.batch_norm( conv2, center=True, scale=True, is_training=self.isTraining) h2 = tf.nn.relu(conv2_bn, 'relu') conv3 = tf.contrib.layers.conv2d(h2, 64, 3, 1, activation_fn=None) conv3_bn = tf.contrib.layers.batch_norm( conv3, center=True, scale=True, is_training=self.isTraining) h3 = tf.nn.relu(conv3_bn, 'relu') # Fully Connected Layers flattened = tf.contrib.layers.flatten(h3) fc1 = tf.contrib.layers.fully_connected(flattened, 512, activation_fn=None) fc1_bn = tf.contrib.layers.batch_norm(fc1, center=True, scale=True, is_training=self.isTraining) fc_act = tf.nn.relu(fc1_bn, 'relu') self.predictions = tf.contrib.layers.fully_connected( fc_act, N_ACTION) # Original Convolution layers # conv1 = tf.contrib.layers.conv2d(X, 32, 8, 4, activation_fn=tf.nn.relu) # conv2 = tf.contrib.layers.conv2d(conv1, 64, 4, 2, activation_fn=tf.nn.relu) # conv3 = tf.contrib.layers.conv2d(conv2, 64, 3, 1, activation_fn=tf.nn.relu) # Original Fully connected layers # flattened = tf.contrib.layers.flatten(conv3) # fc1 = tf.contrib.layers.fully_connected(flattened, 512) # self.predictions = tf.contrib.layers.fully_connected(fc1, N_ACTION) # Q value for action-state pairs # [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, **2**, 3] # 4 batches --> tf.range = [0,1,2,3] # After reshape to 1 dimension array --> if want to find Q_value for a action 1 # --> index of selection action is 4 * (n_batch of action - 1) + actions # Example: action 2 @ batch 3 ---> index = (3-1)*4 + 2 = 10 (remember index counted from 0) # Get the predictions for the chosen actions only gather_indices = tf.range(batch_size) * tf.shape( self.predictions)[1] + self.actions self.action_predictions = tf.gather( tf.reshape(self.predictions, [-1]), gather_indices) # Calculate the loss self.losses = tf.squared_difference(self.y, self.action_predictions) self.loss = tf.reduce_mean(self.losses) self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE) gradients, variables = zip( *self.optimizer.compute_gradients(self.loss)) gradients = [ None if gradient is None else tf.clip_by_norm( gradient, GRADIENT_CLIPPING_NORM) for gradient in gradients ] self.train_op = self.optimizer.apply_gradients( zip(gradients, variables))
def create_variables(self): with tf.name_scope("model_inputs"): # raw state representation self.states = tf.placeholder(tf.float32, (None, self.state_dim), name="states") # rollout action based on current policy with tf.name_scope("predict_actions"): # initialize actor-critic network with tf.variable_scope("actor_network"): self.policy_outputs = self.actor_network(self.states) with tf.variable_scope("critic_network"): self.value_outputs = self.critic_network(self.states) # predict actions from policy network self.action_scores = tf.identity(self.policy_outputs, name="action_scores") # Note 1: tf.multinomial is not good enough to use yet # so we don't use self.predicted_actions for now self.predicted_actions = tf.multinomial(self.action_scores, 1) # get variable list actor_network_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope="actor_network") critic_network_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope="critic_network") # compute loss and gradients with tf.name_scope("compute_pg_gradients"): # gradients for selecting action from policy network self.taken_actions = tf.placeholder(tf.int32, (None, ), name="taken_actions") self.discounted_rewards = tf.placeholder(tf.float32, (None, ), name="discounted_rewards") with tf.variable_scope("actor_network", reuse=True): self.logprobs = self.actor_network(self.states) with tf.variable_scope("critic_network", reuse=True): self.estimated_values = self.critic_network(self.states) # compute policy loss and regularization loss self.cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( self.logprobs, self.taken_actions) self.pg_loss = tf.reduce_mean(self.cross_entropy_loss) self.actor_reg_loss = tf.reduce_sum( [tf.reduce_sum(tf.square(x)) for x in actor_network_variables]) self.actor_loss = self.pg_loss + self.reg_param * self.actor_reg_loss # compute actor gradients self.actor_gradients = self.optimizer.compute_gradients( self.actor_loss, actor_network_variables) # compute advantages A(s) = R - V(s) self.advantages = tf.reduce_sum(self.discounted_rewards - self.estimated_values) # compute policy gradients for i, (grad, var) in enumerate(self.actor_gradients): if grad is not None: self.actor_gradients[i] = (grad * self.advantages, var) # compute critic gradients self.mean_square_loss = tf.reduce_mean( tf.square(self.discounted_rewards - self.estimated_values)) self.critic_reg_loss = tf.reduce_sum([ tf.reduce_sum(tf.square(x)) for x in critic_network_variables ]) self.critic_loss = self.mean_square_loss + self.reg_param * self.critic_reg_loss self.critic_gradients = self.optimizer.compute_gradients( self.critic_loss, critic_network_variables) # collect all gradients self.gradients = self.actor_gradients + self.critic_gradients # clip gradients for i, (grad, var) in enumerate(self.gradients): # clip gradients by norm if grad is not None: self.gradients[i] = (tf.clip_by_norm( grad, self.max_gradient), var) # summarize gradients for grad, var in self.gradients: tf.histogram_summary(var.name, var) if grad is not None: tf.histogram_summary(var.name + '/gradients', grad) # emit summaries tf.histogram_summary("estimated_values", self.estimated_values) tf.scalar_summary("actor_loss", self.actor_loss) tf.scalar_summary("critic_loss", self.critic_loss) tf.scalar_summary("reg_loss", self.actor_reg_loss + self.critic_reg_loss) # training update with tf.name_scope("train_actor_critic"): # apply gradients to update actor network self.train_op = self.optimizer.apply_gradients(self.gradients) self.summarize = tf.merge_all_summaries() self.no_op = tf.no_op()
def clip_grad_local(grad): return tf.clip_by_norm(grad, args.clip_value)
def __init__(self, size_obs, size_act, net_struct = [100, 100, 100, 100], name='dbg'): self.tensorboardpath = 'tensorboards/' + name self.train_writer = tf.summary.FileWriter(self.tensorboardpath) self.ModelPath = 'Models/Imitation' + name self.mse_train = [] self.mse_val = [] self.last_epoch = 0 size_inpt = 200 self.obs = tf.placeholder(tf.float32, shape=(None, size_obs)) self.ret = tf.placeholder(tf.float32, shape=(None)) act_trn = self.obs act_tst = self.obs prev_layer_size = size_obs #Hidden layers self.l2_reg = 1e-8 self.Q_lr = tf.placeholder(tf.float32, shape=(None)) self.lr = tf.placeholder(tf.float32, shape=(None)) if 1: for idx, l in enumerate(net_struct): act_trn, act_tst = ops.cascade_bn_relu_trn_tst( act_trn, prev_layer_size, l, name='layer' + str(idx), input_tst = act_tst) prev_layer_size += l w = tf.Variable(tf.random_uniform([prev_layer_size, size_act],minval = -1., maxval = 1.), name='net_output_w') * 1e-3 b = tf.Variable(tf.random_uniform([size_act],minval = -1., maxval = 1.), name='net_output_bias') * 1e-3 else: for idx, l in enumerate(net_struct): act_trn = ops.linear(act_trn, l, 'layer' + str(idx)) w = tf.Variable(tf.random_uniform([l, size_act],minval = -1., maxval = 1.), name='net_output_w') * 1e-2 b = tf.Variable(tf.random_uniform([size_act],minval = -1., maxval = 1.), name='net_output_bias') * 1e-2 self.yhat = tf.reshape(tf.matmul(act_trn, w) + b, [-1, size_act]) self.yhat_tst = tf.reshape(tf.matmul(act_tst, w) + b, [-1, size_act]) self.obs_act = tf.concat((self.obs, self.yhat),1) self.Q = Q(size_obs + size_act, tf.stop_gradient(self.obs_act)) self.act = tf.placeholder(tf.float32, shape=(None)) self.l2_loss = tf.reduce_mean(tf.square(self.yhat - self.act)) self.adv_loss = tf.reduce_mean(tf.square(self.yhat_tst - self.act)) #-1*tf.gather_nd(output_tst, self.y_raw, axis=1)output_tst[list(np.arange(bs)),self.y_raw] self.advers = tf.gradients(self.l2_loss, self.obs) t_vars = tf.trainable_variables() net_vars = [var for var in t_vars if 'net_' in var.name] self.reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(var)) for var in net_vars])*self.l2_reg optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) gvs = optimizer.compute_gradients(self.l2_loss + self.reg_loss - self.Q.yhat * self.Q_lr + self.Q.l2_loss) self.grad_norm = tf.reduce_mean([tf.reduce_mean(grad) for grad, var in gvs if grad is not None]) clip_norm = 100 clip_single = 1 capped_gvs = [(tf.clip_by_value(grad, -1*clip_single,clip_single), var) for grad, var in gvs if grad is not None] capped_gvs = [(tf.clip_by_norm(grad, clip_norm), var) for grad, var in capped_gvs if grad is not None] self.optimizer = optimizer.apply_gradients(capped_gvs) #self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.l2_loss) self.cur_Q_lr = 0 self.session = tf.Session() self.session.run(tf.global_variables_initializer()) self.Saver = tf.train.Saver()
def __init__(self, batch_size, vocab_size, sentence_size, memory_size, embedding_size, hops=3, max_grad_norm=40.0, nonlin=None, initializer=tf.random_normal_initializer(stddev=0.1), encoding=pos_enc, session=tf.Session(), name='MemN2N'): self._batch_size = batch_size self._vocab_size = vocab_size self._sentence_size = sentence_size self._memory_size = memory_size self._embedding_size = embedding_size self._hops = hops self._max_grad_norm = max_grad_norm self._nonlin = nonlin self._init = initializer self._name = name self.build_inputs() self.build_variables() self._opt = tf.train.GradientDescentOptimizer(learning_rate=self._lr) self._encoding = tf.constant(encoding(self._sentence_size, self._embedding_size), name="encoding") logits = self.calc_output(self._stories, self._queries) # (batch_size, vocab_size) self._logits = logits cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=tf.cast(self._answers, tf.float32), name="cross_entropy") cross_entropy_sum = tf.reduce_sum(cross_entropy, name="cross_entropy_sum") loss_op = cross_entropy_sum grads_and_vars = self._opt.compute_gradients(loss_op) grads_and_vars = [(tf.clip_by_norm(g, self._max_grad_norm), v) for g, v in grads_and_vars] grads_and_vars = [(add_noise(g), v) for g, v in grads_and_vars] nil_grads_and_vars = [] for g, v in grads_and_vars: if v.name in self._nil_vars: nil_grads_and_vars.append((zero_slot(g), v)) else: nil_grads_and_vars.append((g, v)) train_op = self._opt.apply_gradients(nil_grads_and_vars, name="train_op") predict_op = tf.argmax(logits, 1, name="predict_op") self.loss_op = loss_op self.predict_op = predict_op self.train_op = train_op init_op = tf.global_variables_initializer() self._sess = session self._sess.run(init_op)
def max_norm(weights): clipped = tf.clip_by_norm(weights, clip_norm=threshold, axes=axes) clip_weithts = tf.assign(weights, clipped, name=name) tf.add_to_collection(collection, clip_weithts) return None