def func(val): val_name = val.op.name if '/W' in val_name and 'conv1' not in val_name and 'fct' not in val_name: name_scope, device_scope = x.op.name.split('/W') with tf.variable_scope(name_scope, reuse=tf.AUTO_REUSE): if eval(self.quantizer_config['W_opts']['fix_max']) ==True: max_x = tf.stop_gradient( tf.get_variable('maxW', shape=(), initializer=tf.ones_initializer, dtype=tf.float32)) max_x *= float(self.quantizer_config['W_opts']['max_scale']) else: max_x = tf.stop_gradient(tf.reduce_max(tf.abs(x))) mask = tf.get_variable('maskW', shape=val.shape, initializer=tf.zeros_initializer, dtype=tf.float32) probThreshold = (1 + gamma * get_global_step_var()) ** -1 # Determine which filters shall be updated this iteration random_number = K.random_uniform(shape=(1, 1, 1, int(mask.shape[-1]))) random_number1 = K.cast(random_number < probThreshold, dtype='float32') random_number2 = K.cast(random_number < (probThreshold * 0.1), dtype='float32') thresh = max_x * ratio * 0.999 # Incorporate hysteresis into the threshold alpha = thresh beta = 1.2 * thresh # Update the significant weight mask by applying the threshold to the unmasked weights abs_kernel = K.abs(x=val) new_mask = mask - K.cast(abs_kernel < alpha, dtype='float32') * random_number1 new_mask = new_mask + K.cast(abs_kernel > beta, dtype='float32') * random_number2 new_mask = K.clip(x=new_mask, min_value=0., max_value=1.) return tf.assign(mask, new_mask, use_locking=False).op
def _get_lr_variable(options): assert options.init_lr > 0, options.init_lr init_lr = options.init_lr lr_decay_method = options.lr_decay_method name = 'learning_rate' if lr_decay_method is None or lr_decay_method == 'human': lr = tf.get_variable(name, initializer=float(init_lr), trainable=False) global_step = get_global_step_var() assert options.steps_per_epoch, options.steps_per_epoch if lr_decay_method == 'cosine': assert options.max_epoch, options.max_epoch decay_steps = int(options.steps_per_epoch * options.max_epoch) + 1 lr = tf.train.cosine_decay(init_lr, global_step, decay_steps=decay_steps, name=name) elif lr_decay_method == 'exponential': assert options.lr_decay_every, options.lr_decay_every decay_steps = int(options.steps_per_epoch * options.lr_decay_every) lr = tf.train.exponential_decay(init_lr, global_step, decay_steps=decay_steps, decay_rate=options.lr_decay, staircase=True, name=name) tf.summary.scalar(name + '-summary', lr) return lr
def _setup_graph(self): ''' ''' default_dict = { 'name': 'model_pruining', 'begin_pruning_step': 0, 'end_pruning_step': 34400, 'target_sparsity': 0.31, 'pruning_frequency': 344, 'sparsity_function_begin_step': 0, 'sparsity_function_end_step': 34400, 'sparsity_function_exponent': 2, } for k, v in self.param_dict.items(): if k in default_dict: default_dict[k] = v param_list = ['{}={}'.format(k, v) for k, v in default_dict.items()] # param_list = [ # "name=cifar10_pruning", # "begin_pruning_step=1000", # "end_pruning_step=20000", # "target_sparsity=0.9", # "sparsity_function_begin_step=1000", # "sparsity_function_end_step=20000" # ] PRUNE_HPARAMS = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(PRUNE_HPARAMS) self.p = pruning.Pruning(pruning_hparams, global_step=get_global_step_var()) self.p.add_pruning_summaries() self.mask_update_op = self.p.conditional_mask_update_op()
def _addMovingSummary(self, v, *args, **kwargs): """ Args: v (tf.Tensor or list): tensor or list of tensors to summary. Must have scalar type. args: tensors to summary (support positional arguments) decay (float): the decay rate. Defaults to 0.95. collection (str): the name of the collection to add EMA-maintaining ops. The default will work together with the default :class:`MovingAverageSummary` callback. """ from tensorpack.tfutils.summary import add_moving_summary, MOVING_SUMMARY_OPS_KEY from tensorpack.tfutils.tower import get_current_tower_context from tensorpack.tfutils.common import get_global_step_var import re import tensorflow as tf decay = kwargs.pop('decay', 0.95) collection = MOVING_SUMMARY_OPS_KEY summary_collection = None global _current_nn_context if _current_nn_context and _current_nn_context.summary_collection is False: return if _current_nn_context and _current_nn_context.summary_collection: summary_collection = [_current_nn_context.summary_collection] collection = _current_nn_context._summary_collection + '-ema_op' elif 'collection' in kwargs: collection = kwargs.pop('collection') assert len(kwargs) == 0, "Unknown arguments: " + str(kwargs) if not isinstance(v, list): v = [v] v.extend(args) for x in v: assert (isinstance(x, tf.Tensor) or isinstance(x, tf.Variable)), x assert x.get_shape().ndims == 0, x.get_shape() # TODO will produce tower0/xxx? # TODO use zero_debias with tf.name_scope(None): averager = tf.train.ExponentialMovingAverage( decay, num_updates=get_global_step_var(), name='EMA') avg_maintain_op = averager.apply(v) for c in v: # TODO do this in the EMA callback? name = re.sub('tower[pe0-9]+/', '', c.op.name) tf.summary.scalar(name + '-summary', averager.average(c), collections=summary_collection) tf.add_to_collection(collection, avg_maintain_op) return averager, avg_maintain_op
def _apply_drop_path( x, drop_path_keep_prob, curr_depth, total_depth, max_train_steps): layer_ratio = float(curr_depth + 1) / total_depth drop_path_keep_prob = 1.0 - layer_ratio * (1.0 - drop_path_keep_prob) curr_step = tf.to_float(get_global_step_var() + 1) step_ratio = curr_step / tf.to_float(max_train_steps) step_ratio = tf.minimum(1.0, step_ratio) drop_path_keep_prob = 1.0 - step_ratio * (1.0 - drop_path_keep_prob) #with tf.device('/cpu:0'): # tf.summary.scalar('layer_ratio', layer_ratio) # tf.summary.scalar('step_ratio', step_ratio) x = _drop_path(x, drop_path_keep_prob) return x
def update_init_state(self, verbose=False): update_state_ops = [] for k in range(self.num_lstms): _cell_updates = self.basic_cells[k].get_update_ops( self.state[k], self.last_state[k]) update_state_ops.extend(_cell_updates) if verbose: with tf.control_dependencies(update_state_ops): vals = [ get_global_step_var(), tf.reduce_mean(self.state[0]), tf.reduce_mean(self.last_state[0]), ] update_state_ops.append(tf.Print(vals[-1], vals)) return tf.group(*update_state_ops, name='set_init_state')
def update_state(self, dependencies=[], verbose=False, name=None): """ Update op for shifting states. """ with tf.control_dependencies(dependencies): update_state_ops = [] for k in range(self.num_lstms): _cell_updates = self.basic_cells[k].get_update_ops( self.state[k], self.last_state[k]) update_state_ops.extend(_cell_updates) if verbose: c = get_global_step_var() update_state_ops.append(tf.Print(c, [c])) if name is None: name = 'update_state' return tf.group(*update_state_ops, name=name)
def _setup_graph(self) -> None: if self.evaluator: self.evaluator.set_up_graph(self.trainer) # Fetch the requested metrics, along with the global step for debugging. fetches = ( {n: self.get_tensor(n) for n in self.metric_names}, tf.train.get_or_create_global_step(), ) self._fetch = tf.train.SessionRunArgs(fetches=fetches) # Set up model saving logic (taken from tp.callbacks.ModelSaver). self.saver = tf.train.Saver(max_to_keep=None, write_version=tf.train.SaverDef.V2, save_relative_paths=True) tf.add_to_collection(tf.GraphKeys.SAVERS, self.saver) with tf.name_scope(None): self.gs_val = tf.placeholder(tf.int64, shape=()) self.gs_set_op = tf.assign(get_global_step_var(), self.gs_val, name="DET_SET_GLOBAL_STEP").op
def build_graph(self, *inputs): # Dynamic weighting for multiple predictions if self.options.ls_method == ADALOSS_LS_METHOD: dynamic_weights = tf.get_variable( DYNAMIC_WEIGHTS_NAME, (self.n_aux_preds,), tf.float32, trainable=False, initializer=tf.constant_initializer([1.0]*self.n_aux_preds)) for i in range(self.n_aux_preds): weight_i = tf.identity( dynamic_weights[i], 'weight_{:02d}'.format(i)) add_moving_summary(weight_i) with argscope( [ Conv2D, Deconv2D, GroupedConv2D, AvgPooling, MaxPooling, BatchNorm, GlobalAvgPooling, ResizeImages, SeparableConv2D ], data_format=self.data_format ), \ argscope( [Conv2D, Deconv2D, GroupedConv2D, SeparableConv2D], activation=tf.identity, use_bias=self.options.use_bias ), \ argscope( [BatchNorm], momentum=float(self.options.batch_norm_decay), epsilon=float(self.options.batch_norm_epsilon) ), \ argscope( [candidate_gated_layer], eps=self.options.candidate_gate_eps ): # regularization initialization if self.options.regularize_coef == 'const': wd_w = self.options.regularize_const elif self.options.regularize_coef == 'decay': wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True) # Network-level objects / information n_inputs = self.master.num_inputs() drop_path_func = DropPath( drop_path_keep_prob=self.options.drop_path_keep_prob, max_train_steps=self.options.max_train_steps, total_depth=self.n_layers - n_inputs) l_hallu_costs = [] # cell dictionary self.op_to_cell = dict() for cname in self.net_info.cell_names: hid_to_fs_params = _init_feature_select( self.net_info[cname], cname, self.options.feat_sel_lambda) if cname == 'master': # since master has additional duties like down_sampling, # aux prediction, accumulating hallu stats, etc, # master is not built from cell master_hid_to_fs_params = hid_to_fs_params hallu_record = _init_hallu_record(self.compute_hallu_stats) continue self.op_to_cell[cname] = PetridishBaseCell( self.net_info[cname], self.data_format, self.compute_hallu_stats, drop_path_func=drop_path_func, hid_to_fs_params=hid_to_fs_params, l_hallu_costs=l_hallu_costs) l_layers = [None] * self.n_layers layer_dict = dict() out_filters = self.out_filters # on-GPU(device) preprocessing for mean/var, casting, embedding, init conv layer, label = self._preprocess_data(inputs) for layer_idx in range(n_inputs): info = self.master[layer_idx] layer_dict[info.id] = layer #if layer_idx + 1 == n_inputs else None for layer_idx in range(n_inputs, self.n_layers): info = self.master[layer_idx] layer_id_str = "layer{:03d}".format(info.id) strides = 1 if info.down_sampling: out_filters *= 2 strides = 2 # preprocess all inputs to match the most recent layer # in h/w and out_filters in ch_dim #if not self.is_cell_based: with tf.variable_scope('pre_'+layer_id_str): orig_dict = dict() for input_id in info.inputs: in_l = layer_dict[input_id] orig_dict[input_id] = in_l layer_dict[input_id] = _reduce_prev_layer( in_l, input_id, layer, out_filters, self.data_format, hw_only=False) layer = construct_layer( layer_id_str, layer_dict, info, out_filters, strides, self.data_format, info.stop_gradient, op_to_cell=self.op_to_cell, drop_path_func=drop_path_func, non_input_layer_idx=layer_idx - n_inputs, hid_to_fs_params=master_hid_to_fs_params, l_hallu_costs=l_hallu_costs ) # store info for future compute layer_dict[info.id] = layer l_layers[layer_idx] = layer hallu_record = _update_hallu_record( self.compute_hallu_stats, hallu_record, layer_idx, self.master, layer_dict) #if not self.is_cell_based and self.options.use_local_reduction: if self.options.use_local_reduction: # reset the reduction layers in dict. So each layer # uses its own reduction for input_id in orig_dict: layer_dict[input_id] = orig_dict[input_id] # end for layer wise feature construction. # build aux predictions total_cost = 0.0 wd_cost = 0.0 anytime_idx = -1 for layer_idx, layer in enumerate(l_layers): # aux prediction info = self.master[layer_idx] cost_weight = info.aux_weight if cost_weight > 0: anytime_idx += 1 scope_name = scope_prediction(info.id) cost, variables = feature_to_prediction_and_loss( scope_name, layer, label, self.num_classes, self.prediction_feature, ch_dim=self.ch_dim, label_smoothing=self.options.label_smoothing, dense_dropout_keep_prob=self.options.dense_dropout_keep_prob, is_last=(layer_idx + 1 == len(l_layers))) # record the cost for the use of online learners. cost_i = tf.identity(cost, name='anytime_cost_{:02d}'.format(anytime_idx)) # decide whether to use static or dynmic weights if self.options.ls_method == ADALOSS_LS_METHOD: cost_weight = dynamic_weights[anytime_idx] total_cost += cost_weight * cost_i # regularize variable in linear predictors # (have to do this separately here because # we need unregularized losses for cost_weights) for var in variables: wd_cost += cost_weight * wd_w * tf.nn.l2_loss(var) # end if aux_weight > 0 # end for each layer # regularization, cost if self.params_to_regularize is not None: wd_cost += wd_w * regularize_cost(self.params_to_regularize, tf.nn.l2_loss) wd_cost = tf.identity(wd_cost, name='wd_cost') total_cost = tf.identity(total_cost, name='sum_losses') add_moving_summary(total_cost, wd_cost) if l_hallu_costs: hallu_total_cost = tf.add_n(l_hallu_costs, name='hallu_total_cost') add_moving_summary(hallu_total_cost) self.cost = tf.add_n([total_cost, wd_cost, hallu_total_cost], name='cost') else: self.cost = tf.add_n([total_cost, wd_cost], name='cost') # hallu stats for cname in self.net_info.cell_names: if cname == 'master': _hallu_stats_graph( self.compute_hallu_stats, hallu_record, self.cost, scope=cname) continue cell = self.op_to_cell.get(cname, None) cell_hallu_record = getattr(cell, 'hallu_record', None) _hallu_stats_graph_merged( self.compute_hallu_stats, cell_hallu_record, self.cost, scope=cname, n_calls=cell.n_calls, layer_info_list=cell.layer_info_list) return self.cost
def _build_graph(self, inputs): # keep track of statistics to interpolate between different checkpoints # from 0 (only old checkpoint) to 1 (only new network) glbstep = tf.identity(get_global_step_var(), name="glob_step") seen_images = tf.cast(get_global_step_var() * BATCH_SIZE, tf.int64, name='seen_images') if TRANSITION: alpha = tf.divide(tf.cast(seen_images, tf.float32), tf.cast(NUM_IMAGES, tf.float32), name='alpha') transition_phase = tf.get_variable('transistion_phase', initializer=1., trainable=False, dtype=tf.float32) else: alpha = tf.identity(0, name='alpha') transition_phase = tf.get_variable('transistion_phase', initializer=0., trainable=False, dtype=tf.float32) add_moving_summary(alpha, seen_images, tf.identity(transition_phase, name="transistion"), glbstep) if TRANSITION: real_img, real_prev = inputs[0] / 128.0 - 1, Upsample( "upsample_realprev", inputs[1] / 128.0 - 1, factor=2) real_img = combine_img(real_img, real_prev, alpha) else: real_prev = None real_img, real_prev = inputs[0] / 128.0 - 1, None # noise which the generator is starting from z = tf.random_uniform([BATCH_SIZE, NOISE_DIM], -1, 1, name='z_train') z = tf.placeholder_with_default(z, [None, NOISE_DIM], name='z') # GENERATOR # --------------------------------------------------------------------- with tf.variable_scope('gen'): fake_img = self.generator(z, alpha=alpha) visualize_images('real_fake', real_img, fake_img) fake_output = (fake_img + 1.) * 128. fake_output = tf.cast(tf.clip_by_value(fake_output, 0, 255), tf.uint8, name='viz') tf.identity(fake_output, name='fake_img') # DISCRIMINATOR # --------------------------------------------------------------------- with tf.variable_scope('discrim'): WGAN_alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1, 1], minval=0., maxval=1., name='alpha') interp_img = real_img + WGAN_alpha * (fake_img - real_img) visualize_images('real_fake_interp', real_img, fake_img, interp_img) real_score = self.discriminator(real_img, alpha=alpha) fake_score = self.discriminator(fake_img, alpha=alpha) interp_score = self.discriminator(interp_img, alpha=alpha) mean_real_score = tf.reduce_mean(real_score, name='mean_real_score') mean_fake_score = tf.reduce_mean(fake_score, name='mean_fake_score') mean_interp_score = tf.reduce_mean(interp_score, name='mean_interp_score') add_moving_summary(mean_real_score, mean_fake_score, mean_interp_score) # the Wasserstein-GAN losses self.d_loss = tf.reduce_mean(fake_score - real_score, name='d_loss') self.g_loss = tf.negative(tf.reduce_mean(fake_score), name='g_loss') loss_diff = tf.subtract(self.g_loss, self.d_loss, name="loss-diff-g-d") add_moving_summary(self.d_loss, self.g_loss, loss_diff) # the gradient penalty loss def wasserstein_grad_penalty(score, input, name=None): with tf.name_scope(name): gradients = tf.gradients(score, [input])[0] gradients = tf.sqrt( tf.reduce_sum(tf.square(gradients), [1, 2, 3])) gradients_rms = symbolic_functions.rms(gradients, 'gradient_rms') gradient_penalty = tf.reduce_mean(tf.square(gradients - 1), name='gradient_penalty') return gradients_rms, gradient_penalty gradients_rms, gradient_penalty = wasserstein_grad_penalty( interp_score, interp_img) add_moving_summary(gradient_penalty, gradients_rms) # drift-loss drift_loss = tf.reduce_mean(tf.square(real_score), name='drift_loss') self.d_loss = tf.add_n( [self.d_loss, 10 * gradient_penalty, EPS_DRIFT * drift_loss], name='total_d_loss') add_moving_summary(self.d_loss, drift_loss) self.collect_variables() def count_params_in_scope(scope): vs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) return np.sum([int(np.prod(v.shape)) for v in vs]) logger.info(colored("Number of Parameters:", 'cyan')) logger.info("generator #params: {:,}".format( count_params_in_scope('gen'))) logger.info("discriminator #params: {:,}".format( count_params_in_scope('discrim')))
def _build_ad_nn(self, tensor_io): from drlutils.dataflow.tensor_io import TensorIO assert (isinstance(tensor_io, TensorIO)) from drlutils.model.base import get_current_nn_context from tensorpack.tfutils.common import get_global_step_var global_step = get_global_step_var() nnc = get_current_nn_context() is_training = nnc.is_training i_state = tensor_io.getInputTensor('state') i_agentIdent = tensor_io.getInputTensor('agentIdent') i_sequenceLength = tensor_io.getInputTensor('sequenceLength') i_resetRNN = tensor_io.getInputTensor('resetRNN') l = i_state # l = tf.Print(l, [i_state, tf.shape(i_state)], 'State = ') # l = tf.Print(l, [i_agentIdent, tf.shape(i_agentIdent)], 'agentIdent = ') # l = tf.Print(l, [i_sequenceLength, tf.shape(i_sequenceLength)], 'SeqLen = ') # l = tf.Print(l, [i_resetRNN, tf.shape(i_resetRNN)], 'resetRNN = ') with tf.variable_scope('critic', reuse=nnc.reuse) as vs: def _get_cell(): cell = tf.nn.rnn_cell.BasicLSTMCell(256) # if is_training: # cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=0.9) return cell cell = tf.nn.rnn_cell.MultiRNNCell([_get_cell() for _ in range(1)]) rnn_outputs = self._buildRNN( l, cell, tensor_io.batchSize, i_agentIdent=i_agentIdent, i_sequenceLength=i_sequenceLength, i_resetRNN=i_resetRNN, ) rnn_outputs = tf.reshape( rnn_outputs, [-1, rnn_outputs.get_shape().as_list()[-1]]) l = rnn_outputs from ad_cur.autodrive.model.selu import fc_selu for lidx in range(2): l = fc_selu( l, 200, keep_prob=1., # 由于我们只使用传感器训练,关键信息不能丢 is_training=is_training, name='fc-{}'.format(lidx)) value = tf.layers.dense(l, 1, name='fc-value') value = tf.squeeze(value, [1], name="value") if not hasattr(self, '_weights_critic'): self._weights_critic = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) with tf.variable_scope('actor', reuse=nnc.reuse) as vs: l = tf.stop_gradient(l) l = tf.layers.dense(l, 128, activation=tf.nn.relu6, name='fc-actor') mu_steering = 0.5 * tf.layers.dense( l, 1, activation=tf.nn.tanh, name='fc-mu-steering') mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel') mus = tf.concat([mu_steering, mu_accel], axis=-1) # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus') # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas') # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5) def saturating_sigmoid(x): """Saturating sigmoid: 1.2 * sigmoid(x) - 0.1 cut to [0, 1].""" with tf.name_scope("saturating_sigmoid", [x]): y = tf.sigmoid(x) return tf.minimum(1.0, tf.maximum(0.0, 1.2 * y - 0.1)) sigma_steering_ = 0.1 * tf.layers.dense( l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering') sigma_accel_ = 0.25 * tf.layers.dense( l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel') if not nnc.is_evaluating: sigma_beta_steering = tf.get_default_graph( ).get_tensor_by_name('actor/sigma_beta_steering:0') sigma_beta_accel = tf.get_default_graph().get_tensor_by_name( 'actor/sigma_beta_accel:0') sigma_beta_steering = tf.constant(1e-4) # sigma_beta_steering_exp = tf.train.exponential_decay(0.3, global_step, 1000, 0.5, name='sigma/beta/steering/exp') # sigma_beta_accel_exp = tf.train.exponential_decay(0.5, global_step, 5000, 0.5, name='sigma/beta/accel/exp') else: sigma_beta_steering = tf.constant(1e-4) sigma_beta_accel = tf.constant(1e-4) sigma_steering = (sigma_steering_ + sigma_beta_steering) sigma_accel = (sigma_accel_ + sigma_beta_accel) sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1) # if is_training: # pass # # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因: # # 1、训练前期尽量大的探索可以避免网络陷入局部最优 # # 2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来 # # if is_training: # sigmas += sigma_beta_steering # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5) # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5) # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5) # sigmas_orig = sigmas # sigmas = sigmas + sigma_beta_steering # sigmas = tf.minimum(sigmas + 0.1, 100) # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1) # sigma_steering += sigma_beta_steering # sigma_accel += sigma_beta_accel # mus = tf.concat([mu_steering, mu_accel], axis=-1) from tensorflow.contrib.distributions import Normal dists = Normal(mus, sigmas + 0.01) policy = tf.squeeze(dists.sample([1]), [0]) # 裁剪到两倍方差之内 policy = tf.clip_by_value(policy, mus - 2 * sigmas, mus + 2 * sigmas) if is_training: self._addMovingSummary( tf.reduce_mean(mu_steering, name='mu/steering/mean'), tf.reduce_mean(mu_accel, name='mu/accel/mean'), tf.reduce_mean(sigma_steering, name='sigma/steering/mean'), tf.reduce_max(sigma_steering, name='sigma/steering/max'), tf.reduce_mean(sigma_accel, name='sigma/accel/mean'), tf.reduce_max(sigma_accel, name='sigma/accel/max'), # sigma_beta_accel, # sigma_beta_steering, ) # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions], # 'mu/sigma/sigma.orig/act=', summarize=4) if not hasattr(self, '_weights_actor'): self._weights_actor = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) if not is_training: tensor_io.setOutputTensors(policy, value, mus, sigmas) return i_actions = tensor_io.getInputTensor("action") # i_actions = tf.Print(i_actions, [i_actions], 'actions = ') i_actions = tf.reshape(i_actions, [-1] + i_actions.get_shape().as_list()[2:]) log_probs = dists.log_prob(i_actions) # exp_v = tf.transpose( # tf.multiply(tf.transpose(log_probs), advantage)) # exp_v = tf.multiply(log_probs, advantage) i_advantage = tensor_io.getInputTensor("advantage") i_advantage = tf.reshape(i_advantage, [-1] + i_advantage.get_shape().as_list()[2:]) exp_v = log_probs * tf.expand_dims(i_advantage, -1) entropy = dists.entropy() entropy_beta = tf.get_variable( 'entropy_beta', shape=[], initializer=tf.constant_initializer(0.01), trainable=False) exp_v = entropy_beta * entropy + exp_v loss_policy = tf.reduce_mean(-tf.reduce_sum(exp_v, axis=-1), name='loss/policy') i_futurereward = tensor_io.getInputTensor("futurereward") i_futurereward = tf.reshape(i_futurereward, [-1] + i_futurereward.get_shape().as_list()[2:]) loss_value = tf.reduce_mean(0.5 * tf.square(value - i_futurereward)) loss_entropy = tf.reduce_mean(tf.reduce_sum(entropy, axis=-1), name='xentropy_loss') from tensorflow.contrib.layers.python.layers.regularizers import apply_regularization, l2_regularizer loss_l2_regularizer = apply_regularization(l2_regularizer(1e-4), self._weights_critic) loss_l2_regularizer = tf.identity(loss_l2_regularizer, 'loss/l2reg') loss_value += loss_l2_regularizer loss_value = tf.identity(loss_value, name='loss/value') # self.cost = tf.add_n([loss_policy, loss_value * 0.1, loss_l2_regularizer]) self._addParamSummary([('.*', ['rms', 'absmax'])]) pred_reward = tf.reduce_mean(value, name='predict_reward') import tensorpack.tfutils.symbolic_functions as symbf advantage = symbf.rms(i_advantage, name='rms_advantage') self._addMovingSummary( loss_policy, loss_value, loss_entropy, pred_reward, advantage, loss_l2_regularizer, tf.reduce_mean(policy[:, 0], name='actor/steering/mean'), tf.reduce_mean(policy[:, 1], name='actor/accel/mean'), ) return loss_policy, loss_value
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--logdir', help='logdir', default='') args = parser.parse_args() # P_py = np.load('/jasper/models/gp140/P_py.npy') Ppy = np.load('/jasper/models/BetaGal/betagal1.5_projections.npy') Ppy = Ppy[0] # leave only first symmetric unit vlen, nviews = Ppy.shape[-1], Ppy.shape[0] os.environ['CUDA_VISIBLE_DEVICES'] = get_visible_device_list(3) global_step = get_global_step_var() # set logger directory for checkpoints, etc logger.set_logger_dir(args.logdir, action='k') steps_per_epoch = cfg.EPOCH_STEPS model = Model(vlen, nviews) # config.gpu_options.allow_growth = True traincfg = TrainConfig( model=model, data=QueueInput(ProjDataFlow(Ppy)), callbacks=[ PeriodicTrigger(ModelSaver(), every_k_epochs=5), PeriodicTrigger(VolumeSaver(model), every_k_epochs=5), # prevent learning in the first epoch # MemInitHyperParamSetter('learning_rate_mask',(0,1)),
def _get_NN_prediction(self, state): from tensorpack.tfutils import symbolic_functions ctx = get_current_tower_context() is_training = ctx.is_training l = state # l = tf.Print(l, [state], 'State = ') with tf.variable_scope('critic') as vs: from autodrive.model.selu import fc_selu for lidx in range(8): l = fc_selu(l, 200, keep_prob=1., # 由于我们只使用传感器训练,关键信息不能丢 is_training=is_training, name='fc-{}'.format(lidx)) # l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc-dense') # for lidx, hidden_size in enumerate([300, 600]): # l = tf.layers.dense(l, hidden_size, activation=tf.nn.relu, name='fc-%d'%lidx) value = tf.layers.dense(l, 1, name='fc-value',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.1)) if not hasattr(self, '_weights_critic'): self._weights_critic = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) with tf.variable_scope('actor') as vs: l = tf.stop_gradient(l) mu_steering = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-steering',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) mus = tf.concat([mu_steering, mu_accel], axis=-1) # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus') # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas') # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5) sigma_steering_ = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) sigma_accel_ = 1. * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) # sigma_beta_steering = symbolic_functions.get_scalar_var('sigma_beta_steering', 0.3, summary=True, trainable=False) # sigma_beta_accel = symbolic_functions.get_scalar_var('sigma_beta_accel', 0.3, summary=True, trainable=False) from tensorpack.tfutils.common import get_global_step_var sigma_beta_steering_exp = tf.train.exponential_decay(0.001, get_global_step_var(), 1000, 0.5, name='sigma/beta/steering/exp') sigma_beta_accel_exp = tf.train.exponential_decay(0.5, get_global_step_var(), 5000, 0.5, name='sigma/beta/accel/exp') # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5) # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2) # sigma_steering = sigma_steering_ sigma_steering = (sigma_steering_ + sigma_beta_steering_exp) sigma_accel = (sigma_accel_ + sigma_beta_accel_exp) #* 0.1 # sigma_steering = sigma_steering_ # sigma_accel = sigma_accel_ sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1) # sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5) # sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5) # sigmas = sigmas_orig + 0.001 # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5) # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32, # initializer=tf.constant_initializer(.5), trainable=False) # if is_training: # pass # # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因: # # 1、训练前期尽量大的探索可以避免网络陷入局部最优 # # 2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来 # # if is_training: # sigmas += sigma_beta_steering # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5) # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5) # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5) # sigmas_orig = sigmas # sigmas = sigmas + sigma_beta_steering # sigmas = tf.minimum(sigmas + 0.1, 100) # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1) # sigma_steering += sigma_beta_steering # sigma_accel += sigma_beta_accel # mus = tf.concat([mu_steering, mu_accel], axis=-1) from tensorflow.contrib.distributions import Normal dists = Normal(mus, sigmas+1e-3) actions = tf.squeeze(dists.sample([1]), [0]) # 裁剪到一倍方差之内 # actions = tf.clip_by_value(actions, -1., 1.) if is_training: summary.add_moving_summary(tf.reduce_mean(mu_steering, name='mu/steering/mean'), tf.reduce_mean(mu_accel, name='mu/accel/mean'), tf.reduce_mean(sigma_steering, name='sigma/steering/mean'), tf.reduce_max(sigma_steering, name='sigma/steering/max'), tf.reduce_mean(sigma_accel, name='sigma/accel/mean'), tf.reduce_max(sigma_accel, name='sigma/accel/max'), sigma_beta_accel_exp, sigma_beta_steering_exp, ) # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions], # 'mu/sigma/sigma.orig/act=', summarize=4) if not hasattr(self, '_weights_actor'): self._weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) return actions, value, dists
def _build_graph(self, inputs): from tensorpack.tfutils.common import get_global_step_var state, action, futurereward, advantage = inputs is_training = get_current_tower_context().is_training policy, value, dists = self._get_NN_prediction(state) if not hasattr(self, '_weights_train'): self._weights_train = self._weights_critic + self._weights_actor self.value = tf.squeeze(value, [1], name='value') # (B,) self.policy = tf.identity(policy, name='policy') with tf.variable_scope("Pred") as vs: __p, __v, _ = self._get_NN_prediction(state) __v = tf.squeeze(__v, [1], name='value') # (B,) __p = tf.identity(__p, name='policy') if not hasattr(self, '_weights_pred'): self._weights_pred = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) assert (len(self._weights_train) == len(self._weights_pred)) assert (not hasattr(self, '_sync_op')) self._sync_op = tf.group(*[d.assign(s + tf.truncated_normal(tf.shape(s), stddev=0.02)) for d, s in zip(self._weights_pred, self._weights_train)]) with tf.variable_scope('pre') as vs: pre_p,pre_v,pre_dists=self._get_NN_prediction(state) if not hasattr(self,'pre_weights'): self.pre_weights=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=vs.name) self._td_sync_op = tf.group(*[d.assign(s) for d, s in zip(self.pre_weights, self._weights_train)]) if not is_training: return # advantage = tf.subtract(tf.stop_gradient(self.value), futurereward, name='advantage') # advantage = tf.Print(advantage, [self.value, futurereward, action, advantage], 'value/reward/act/advantage=', summarize=4) log_probs = dists.log_prob(action) #add ppo policy clip loss #add ratio ,surr1, surr2 pre_probs=pre_dists.log_prob(action) ratio=tf.exp(log_probs-pre_probs) prob_ratio = tf.reduce_mean(input_tensor=tf.concat(values=ratio, axis=1), axis=1) clip_param=tf.train.exponential_decay(CLIP_PARAMETER, get_global_step_var(), 10000, 0.98, name='clip_param') # surr1=prob_ratio*advantage surr1=ratio*tf.expand_dims(advantage, -1) surr2=tf.clip_by_value(ratio,1.0-clip_param,1.0+clip_param)*tf.expand_dims(advantage, -1) # surr2=tf.clip_by_value(prob_ratio,1.0-clip_param,1.0+clip_param)*advantage loss_policy=-tf.reduce_mean(tf.minimum(surr1,surr2)) #add critic clip loss v_loss1=tf.square(value-futurereward) pre_value=pre_v+tf.clip_by_value(value-pre_v,-clip_param,clip_param) v_loss2=tf.square(pre_v-futurereward) # loss_value=0.5*tf.reduce_mean(tf.maximum(v_loss1,v_loss2)) loss_value=0.5*tf.reduce_mean(v_loss1) entropy = dists.entropy() entropy_beta = tf.get_variable('entropy_beta', shape=[], initializer=tf.constant_initializer(0.01), trainable=False) exp_v = entropy_beta * entropy loss_entropy = tf.reduce_mean(-tf.reduce_sum(exp_v, axis=-1), name='loss/policy') loss_policy=loss_policy+loss_entropy # exp_v = tf.transpose( # tf.multiply(tf.transpose(log_probs), advantage)) # exp_v = tf.multiply(log_probs, advantage) # exp_v = log_probs * tf.expand_dims(advantage, -1) # entropy = dists.entropy() # entropy_beta = tf.get_variable('entropy_beta', shape=[], # initializer=tf.constant_initializer(0.01), trainable=False) # exp_v = entropy_beta * entropy + exp_v # loss_value = tf.reduce_mean(0.5 * tf.square(self.value - futurereward)) # loss_entropy = tf.reduce_mean(tf.reduce_sum(entropy, axis=-1), name='xentropy_loss') from tensorflow.contrib.layers.python.layers.regularizers import apply_regularization, l2_regularizer loss_l2_regularizer = apply_regularization(l2_regularizer(1e-4), self._weights_critic) loss_l2_regularizer = tf.identity(loss_l2_regularizer, 'loss/l2reg') loss_value += loss_l2_regularizer loss_value = tf.identity(loss_value, name='loss/value') # self.cost = tf.add_n([loss_policy, loss_value * 0.1, loss_l2_regularizer]) self._cost = [loss_policy, loss_value ] from autodrive.trainer.summary import addParamSummary addParamSummary([('.*', ['rms', 'absmax'])]) pred_reward = tf.reduce_mean(self.value, name='predict_reward') advantage = symbf.rms(advantage, name='rms_advantage') summary.add_moving_summary(loss_policy, loss_value, loss_entropy, pred_reward, advantage, loss_l2_regularizer, tf.reduce_mean(self.policy[:, 0], name='action/steering/mean'), tf.reduce_mean(self.policy[:, 1], name='action/accel/mean'), )
def build_graph(self, image1, label1, image2, _): image1 = self.image_preprocess(image1) image2 = self.image_preprocess(image2) is_training = get_current_tower_context().is_training # Shuffle unlabeled data within batch if is_training: image2 = tf.random_shuffle(image2) assert self.data_format in ['NCHW', 'NHWC'] if self.data_format == 'NCHW': image1 = tf.transpose(image1, [0, 3, 1, 2]) image2 = tf.transpose(image2, [0, 3, 1, 2]) # Pseudo Label logits2, _ = self.get_logits(image2) label2 = tf.nn.softmax(logits2) # Change this line if you modified training schedule or batchsize: 60 Epoch_num, 256 Batch_size k = tf.cast(get_global_step_var(), tf.float32) / (60 * 1280000 / 256) # Sample lambda dist_beta = tf.distributions.Beta(1.0, 1.0) lmb = dist_beta.sample(tf.shape(image1)[0]) lmb_x = tf.reshape(lmb, [-1, 1, 1, 1]) lmb_y = tf.reshape(lmb, [-1, 1]) # Interpolation label_ori = label1 if is_training: image = tf.to_float(image1) * lmb_x + tf.to_float(image2) * (1. - lmb_x) label = tf.stop_gradient(tf.to_float(tf.one_hot(label1, 1000)) * lmb_y + tf.to_float(label2) * (1. - lmb_y)) else: image = image1 label = tf.to_float(tf.one_hot(label1, 1000)) # Calculate feats and logits for interpolated samples with tf.variable_scope(tf.get_variable_scope(), reuse=True): logits, features = self.get_logits(image) # Classification Loss and error loss = ImageNetModel.compute_loss_and_error( logits, label, label_smoothing=self.label_smoothing, lmb=lmb, label_ori=label_ori) # Distribution Alignment lp = 2. / (1. + tf.exp(-10. * k)) - 1 net_ = flip_gradient(features, lp) fc1 = FullyConnected('linear_1', net_, 1024, nl=tf.nn.relu) fc2 = FullyConnected('linear_2', fc1, 1024, nl=tf.nn.relu) domain_logits = FullyConnected("logits_dm", fc2, 2) label_dm = tf.concat([tf.reshape(lmb, [-1, 1]), tf.reshape(1. - lmb, [-1, 1])], axis=1) da_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label_dm, logits=domain_logits)) # Final Loss loss += da_cost if self.weight_decay > 0: wd_loss = regularize_cost(self.weight_decay_pattern, tf.contrib.layers.l2_regularizer(self.weight_decay), name='l2_regularize_loss') add_moving_summary(loss, wd_loss) total_cost = tf.add_n([loss, wd_loss], name='cost') else: total_cost = tf.identity(loss, name='cost') add_moving_summary(total_cost) if self.loss_scale != 1.: logger.info("Scaling the total loss by {} ...".format(self.loss_scale)) return total_cost * self.loss_scale else: return total_cost
def _setup_graph(self): global_step = get_global_step_var() self.assign_op = global_step.assign(self.global_step_val)
def _get_NN_prediction(self, state): from tensorpack.tfutils import symbolic_functions ctx = get_current_tower_context() is_training = ctx.is_training l = state # l = tf.Print(l, [state], 'State = ') with tf.variable_scope('critic') as vs: from autodrive.model.selu import fc_selu for lidx in range(8): l = fc_selu( l, 200, keep_prob=1., # 由于我们只使用传感器训练,关键信息不能丢 is_training=is_training, name='fc-{}'.format(lidx)) # l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc-dense') # for lidx, hidden_size in enumerate([300, 600]): # l = tf.layers.dense(l, hidden_size, activation=tf.nn.relu, name='fc-%d'%lidx) value = tf.layers.dense(l, 1, name='fc-value',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.1)) if not hasattr(self, '_weights_critic'): self._weights_critic = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) with tf.variable_scope('actor') as vs: l = tf.stop_gradient(l) mu_steering = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-steering',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) mus = tf.concat([mu_steering, mu_accel], axis=-1) # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus') # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas') # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5) sigma_steering_ = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) sigma_accel_ = 1. * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) # sigma_beta_steering = symbolic_functions.get_scalar_var('sigma_beta_steering', 0.3, summary=True, trainable=False) # sigma_beta_accel = symbolic_functions.get_scalar_var('sigma_beta_accel', 0.3, summary=True, trainable=False) from tensorpack.tfutils.common import get_global_step_var sigma_beta_steering_exp = tf.train.exponential_decay( 0.001, get_global_step_var(), 1000, 0.5, name='sigma/beta/steering/exp') sigma_beta_accel_exp = tf.train.exponential_decay( 0.5, get_global_step_var(), 5000, 0.5, name='sigma/beta/accel/exp') # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5) # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2) # sigma_steering = sigma_steering_ sigma_steering = (sigma_steering_ + sigma_beta_steering_exp) sigma_accel = (sigma_accel_ + sigma_beta_accel_exp) #* 0.1 # sigma_steering = sigma_steering_ # sigma_accel = sigma_accel_ sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1) # sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5) # sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5) # sigmas = sigmas_orig + 0.001 # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5) # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32, # initializer=tf.constant_initializer(.5), trainable=False) # if is_training: # pass # # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因: # # 1、训练前期尽量大的探索可以避免网络陷入局部最优 # # 2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来 # # if is_training: # sigmas += sigma_beta_steering # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5) # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5) # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5) # sigmas_orig = sigmas # sigmas = sigmas + sigma_beta_steering # sigmas = tf.minimum(sigmas + 0.1, 100) # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1) # sigma_steering += sigma_beta_steering # sigma_accel += sigma_beta_accel # mus = tf.concat([mu_steering, mu_accel], axis=-1) from tensorflow.contrib.distributions import Normal dists = Normal(mus, sigmas + 1e-3) actions = tf.squeeze(dists.sample([1]), [0]) # 裁剪到一倍方差之内 # actions = tf.clip_by_value(actions, -1., 1.) if is_training: summary.add_moving_summary( tf.reduce_mean(mu_steering, name='mu/steering/mean'), tf.reduce_mean(mu_accel, name='mu/accel/mean'), tf.reduce_mean(sigma_steering, name='sigma/steering/mean'), tf.reduce_max(sigma_steering, name='sigma/steering/max'), tf.reduce_mean(sigma_accel, name='sigma/accel/mean'), tf.reduce_max(sigma_accel, name='sigma/accel/max'), sigma_beta_accel_exp, sigma_beta_steering_exp, ) # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions], # 'mu/sigma/sigma.orig/act=', summarize=4) if not hasattr(self, '_weights_actor'): self._weights_actor = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) return actions, value, dists
def _build_graph(self, inputs): from tensorpack.tfutils.common import get_global_step_var state, action, futurereward, advantage = inputs is_training = get_current_tower_context().is_training policy, value, dists = self._get_NN_prediction(state) if not hasattr(self, '_weights_train'): self._weights_train = self._weights_critic + self._weights_actor self.value = tf.squeeze(value, [1], name='value') # (B,) self.policy = tf.identity(policy, name='policy') with tf.variable_scope("Pred") as vs: __p, __v, _ = self._get_NN_prediction(state) __v = tf.squeeze(__v, [1], name='value') # (B,) __p = tf.identity(__p, name='policy') if not hasattr(self, '_weights_pred'): self._weights_pred = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) assert (len(self._weights_train) == len(self._weights_pred)) assert (not hasattr(self, '_sync_op')) self._sync_op = tf.group(*[ d.assign(s + tf.truncated_normal(tf.shape(s), stddev=0.02)) for d, s in zip(self._weights_pred, self._weights_train) ]) with tf.variable_scope('pre') as vs: pre_p, pre_v, pre_dists = self._get_NN_prediction(state) if not hasattr(self, 'pre_weights'): self.pre_weights = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) self._td_sync_op = tf.group(*[ d.assign(s) for d, s in zip(self.pre_weights, self._weights_train) ]) if not is_training: return # advantage = tf.subtract(tf.stop_gradient(self.value), futurereward, name='advantage') # advantage = tf.Print(advantage, [self.value, futurereward, action, advantage], 'value/reward/act/advantage=', summarize=4) log_probs = dists.log_prob(action) #add ppo policy clip loss #add ratio ,surr1, surr2 pre_probs = pre_dists.log_prob(action) ratio = tf.exp(log_probs - pre_probs) prob_ratio = tf.reduce_mean(input_tensor=tf.concat(values=ratio, axis=1), axis=1) clip_param = tf.train.exponential_decay(CLIP_PARAMETER, get_global_step_var(), 10000, 0.98, name='clip_param') # surr1=prob_ratio*advantage surr1 = ratio * tf.expand_dims(advantage, -1) surr2 = tf.clip_by_value(ratio, 1.0 - clip_param, 1.0 + clip_param) * tf.expand_dims(advantage, -1) # surr2=tf.clip_by_value(prob_ratio,1.0-clip_param,1.0+clip_param)*advantage loss_policy = -tf.reduce_mean(tf.minimum(surr1, surr2)) #add critic clip loss v_loss1 = tf.square(value - futurereward) pre_value = pre_v + tf.clip_by_value(value - pre_v, -clip_param, clip_param) v_loss2 = tf.square(pre_v - futurereward) # loss_value=0.5*tf.reduce_mean(tf.maximum(v_loss1,v_loss2)) loss_value = 0.5 * tf.reduce_mean(v_loss1) entropy = dists.entropy() entropy_beta = tf.get_variable( 'entropy_beta', shape=[], initializer=tf.constant_initializer(0.01), trainable=False) exp_v = entropy_beta * entropy loss_entropy = tf.reduce_mean(-tf.reduce_sum(exp_v, axis=-1), name='loss/policy') loss_policy = loss_policy + loss_entropy # exp_v = tf.transpose( # tf.multiply(tf.transpose(log_probs), advantage)) # exp_v = tf.multiply(log_probs, advantage) # exp_v = log_probs * tf.expand_dims(advantage, -1) # entropy = dists.entropy() # entropy_beta = tf.get_variable('entropy_beta', shape=[], # initializer=tf.constant_initializer(0.01), trainable=False) # exp_v = entropy_beta * entropy + exp_v # loss_value = tf.reduce_mean(0.5 * tf.square(self.value - futurereward)) # loss_entropy = tf.reduce_mean(tf.reduce_sum(entropy, axis=-1), name='xentropy_loss') from tensorflow.contrib.layers.python.layers.regularizers import apply_regularization, l2_regularizer loss_l2_regularizer = apply_regularization(l2_regularizer(1e-4), self._weights_critic) loss_l2_regularizer = tf.identity(loss_l2_regularizer, 'loss/l2reg') loss_value += loss_l2_regularizer loss_value = tf.identity(loss_value, name='loss/value') # self.cost = tf.add_n([loss_policy, loss_value * 0.1, loss_l2_regularizer]) self._cost = [loss_policy, loss_value] from autodrive.trainer.summary import addParamSummary addParamSummary([('.*', ['rms', 'absmax'])]) pred_reward = tf.reduce_mean(self.value, name='predict_reward') advantage = symbf.rms(advantage, name='rms_advantage') summary.add_moving_summary( loss_policy, loss_value, loss_entropy, pred_reward, advantage, loss_l2_regularizer, tf.reduce_mean(self.policy[:, 0], name='action/steering/mean'), tf.reduce_mean(self.policy[:, 1], name='action/accel/mean'), )