示例#1
0
        def func(val):
            val_name = val.op.name
            if '/W' in val_name and 'conv1' not in val_name and 'fct' not in val_name:
                name_scope, device_scope = x.op.name.split('/W')

                with tf.variable_scope(name_scope, reuse=tf.AUTO_REUSE):
                    if eval(self.quantizer_config['W_opts']['fix_max']) ==True:
                        max_x = tf.stop_gradient(
                            tf.get_variable('maxW', shape=(), initializer=tf.ones_initializer, dtype=tf.float32))
                        max_x *= float(self.quantizer_config['W_opts']['max_scale'])
                    else:
                        max_x = tf.stop_gradient(tf.reduce_max(tf.abs(x)))
                    mask = tf.get_variable('maskW', shape=val.shape, initializer=tf.zeros_initializer, dtype=tf.float32)

                probThreshold = (1 + gamma * get_global_step_var()) ** -1

                # Determine which filters shall be updated this iteration
                random_number = K.random_uniform(shape=(1, 1, 1, int(mask.shape[-1])))
                random_number1 = K.cast(random_number < probThreshold, dtype='float32')
                random_number2 = K.cast(random_number < (probThreshold * 0.1), dtype='float32')

                thresh = max_x * ratio * 0.999

                # Incorporate hysteresis into the threshold
                alpha = thresh
                beta = 1.2 * thresh

                # Update the significant weight mask by applying the threshold to the unmasked weights
                abs_kernel = K.abs(x=val)
                new_mask = mask - K.cast(abs_kernel < alpha, dtype='float32') * random_number1
                new_mask = new_mask + K.cast(abs_kernel > beta, dtype='float32') * random_number2
                new_mask = K.clip(x=new_mask, min_value=0., max_value=1.)
                return tf.assign(mask, new_mask, use_locking=False).op
示例#2
0
def _get_lr_variable(options):
    assert options.init_lr > 0, options.init_lr
    init_lr = options.init_lr
    lr_decay_method = options.lr_decay_method
    name = 'learning_rate'
    if lr_decay_method is None or lr_decay_method == 'human':
        lr = tf.get_variable(name, initializer=float(init_lr), trainable=False)

    global_step = get_global_step_var()
    assert options.steps_per_epoch, options.steps_per_epoch
    if lr_decay_method == 'cosine':
        assert options.max_epoch, options.max_epoch
        decay_steps = int(options.steps_per_epoch * options.max_epoch) + 1
        lr = tf.train.cosine_decay(init_lr,
                                   global_step,
                                   decay_steps=decay_steps,
                                   name=name)

    elif lr_decay_method == 'exponential':
        assert options.lr_decay_every, options.lr_decay_every
        decay_steps = int(options.steps_per_epoch * options.lr_decay_every)
        lr = tf.train.exponential_decay(init_lr,
                                        global_step,
                                        decay_steps=decay_steps,
                                        decay_rate=options.lr_decay,
                                        staircase=True,
                                        name=name)

    tf.summary.scalar(name + '-summary', lr)
    return lr
示例#3
0
    def _setup_graph(self):
        '''
        '''
        default_dict = {
            'name': 'model_pruining',
            'begin_pruning_step': 0,
            'end_pruning_step': 34400,
            'target_sparsity': 0.31,
            'pruning_frequency': 344,
            'sparsity_function_begin_step': 0,
            'sparsity_function_end_step': 34400,
            'sparsity_function_exponent': 2,
        }
        for k, v in self.param_dict.items():
            if k in default_dict:
                default_dict[k] = v

        param_list = ['{}={}'.format(k, v) for k, v in default_dict.items()]
        # param_list = [
        #         "name=cifar10_pruning",
        #         "begin_pruning_step=1000",
        #         "end_pruning_step=20000",
        #         "target_sparsity=0.9",
        #         "sparsity_function_begin_step=1000",
        #         "sparsity_function_end_step=20000"
        # ]

        PRUNE_HPARAMS = ",".join(param_list)
        pruning_hparams = pruning.get_pruning_hparams().parse(PRUNE_HPARAMS)
        self.p = pruning.Pruning(pruning_hparams,
                                 global_step=get_global_step_var())
        self.p.add_pruning_summaries()
        self.mask_update_op = self.p.conditional_mask_update_op()
示例#4
0
    def _addMovingSummary(self, v, *args, **kwargs):
        """
        Args:
            v (tf.Tensor or list): tensor or list of tensors to summary. Must have
                scalar type.
            args: tensors to summary (support positional arguments)
            decay (float): the decay rate. Defaults to 0.95.
            collection (str): the name of the collection to add EMA-maintaining ops.
                The default will work together with the default
                :class:`MovingAverageSummary` callback.
        """
        from tensorpack.tfutils.summary import add_moving_summary, MOVING_SUMMARY_OPS_KEY
        from tensorpack.tfutils.tower import get_current_tower_context
        from tensorpack.tfutils.common import get_global_step_var
        import re
        import tensorflow as tf
        decay = kwargs.pop('decay', 0.95)
        collection = MOVING_SUMMARY_OPS_KEY
        summary_collection = None
        global _current_nn_context
        if _current_nn_context and _current_nn_context.summary_collection is False:
            return

        if _current_nn_context and _current_nn_context.summary_collection:
            summary_collection = [_current_nn_context.summary_collection]
            collection = _current_nn_context._summary_collection + '-ema_op'
        elif 'collection' in kwargs:
            collection = kwargs.pop('collection')

            assert len(kwargs) == 0, "Unknown arguments: " + str(kwargs)

        if not isinstance(v, list):
            v = [v]
        v.extend(args)
        for x in v:
            assert (isinstance(x, tf.Tensor) or isinstance(x, tf.Variable)), x
            assert x.get_shape().ndims == 0, x.get_shape()
        # TODO will produce tower0/xxx?
        # TODO use zero_debias
        with tf.name_scope(None):
            averager = tf.train.ExponentialMovingAverage(
                decay, num_updates=get_global_step_var(), name='EMA')
            avg_maintain_op = averager.apply(v)

            for c in v:
                # TODO do this in the EMA callback?
                name = re.sub('tower[pe0-9]+/', '', c.op.name)
                tf.summary.scalar(name + '-summary',
                                  averager.average(c),
                                  collections=summary_collection)

        tf.add_to_collection(collection, avg_maintain_op)
        return averager, avg_maintain_op
def _apply_drop_path(
        x, drop_path_keep_prob, curr_depth, total_depth, max_train_steps):
    layer_ratio = float(curr_depth + 1) / total_depth
    drop_path_keep_prob = 1.0 - layer_ratio * (1.0 - drop_path_keep_prob)

    curr_step = tf.to_float(get_global_step_var() + 1)
    step_ratio = curr_step / tf.to_float(max_train_steps)
    step_ratio = tf.minimum(1.0, step_ratio)
    drop_path_keep_prob = 1.0 - step_ratio * (1.0 - drop_path_keep_prob)
    #with tf.device('/cpu:0'):
    #    tf.summary.scalar('layer_ratio', layer_ratio)
    #    tf.summary.scalar('step_ratio', step_ratio)
    x = _drop_path(x, drop_path_keep_prob)
    return x
示例#6
0
 def update_init_state(self, verbose=False):
     update_state_ops = []
     for k in range(self.num_lstms):
         _cell_updates = self.basic_cells[k].get_update_ops(
             self.state[k], self.last_state[k])
         update_state_ops.extend(_cell_updates)
     if verbose:
         with tf.control_dependencies(update_state_ops):
             vals = [
                 get_global_step_var(),
                 tf.reduce_mean(self.state[0]),
                 tf.reduce_mean(self.last_state[0]),
             ]
             update_state_ops.append(tf.Print(vals[-1], vals))
     return tf.group(*update_state_ops, name='set_init_state')
示例#7
0
 def update_state(self, dependencies=[], verbose=False, name=None):
     """
     Update op for shifting states.
     """
     with tf.control_dependencies(dependencies):
         update_state_ops = []
         for k in range(self.num_lstms):
             _cell_updates = self.basic_cells[k].get_update_ops(
                 self.state[k], self.last_state[k])
             update_state_ops.extend(_cell_updates)
         if verbose:
             c = get_global_step_var()
             update_state_ops.append(tf.Print(c, [c]))
         if name is None:
             name = 'update_state'
         return tf.group(*update_state_ops, name=name)
    def _setup_graph(self) -> None:
        if self.evaluator:
            self.evaluator.set_up_graph(self.trainer)

        # Fetch the requested metrics, along with the global step for debugging.
        fetches = (
            {n: self.get_tensor(n)
             for n in self.metric_names},
            tf.train.get_or_create_global_step(),
        )
        self._fetch = tf.train.SessionRunArgs(fetches=fetches)

        # Set up model saving logic (taken from tp.callbacks.ModelSaver).
        self.saver = tf.train.Saver(max_to_keep=None,
                                    write_version=tf.train.SaverDef.V2,
                                    save_relative_paths=True)
        tf.add_to_collection(tf.GraphKeys.SAVERS, self.saver)

        with tf.name_scope(None):
            self.gs_val = tf.placeholder(tf.int64, shape=())
            self.gs_set_op = tf.assign(get_global_step_var(),
                                       self.gs_val,
                                       name="DET_SET_GLOBAL_STEP").op
    def build_graph(self, *inputs):
        # Dynamic weighting for multiple predictions
        if self.options.ls_method == ADALOSS_LS_METHOD:
            dynamic_weights = tf.get_variable(
                DYNAMIC_WEIGHTS_NAME, (self.n_aux_preds,),
                tf.float32, trainable=False,
                initializer=tf.constant_initializer([1.0]*self.n_aux_preds))
            for i in range(self.n_aux_preds):
                weight_i = tf.identity(
                    dynamic_weights[i], 'weight_{:02d}'.format(i))
                add_moving_summary(weight_i)

        with argscope(
                    [
                        Conv2D, Deconv2D, GroupedConv2D, AvgPooling,
                        MaxPooling, BatchNorm, GlobalAvgPooling,
                        ResizeImages, SeparableConv2D
                    ],
                    data_format=self.data_format
                ), \
                argscope(
                    [Conv2D, Deconv2D, GroupedConv2D, SeparableConv2D],
                    activation=tf.identity,
                    use_bias=self.options.use_bias
                ), \
                argscope(
                    [BatchNorm],
                    momentum=float(self.options.batch_norm_decay),
                    epsilon=float(self.options.batch_norm_epsilon)
                ), \
                argscope(
                    [candidate_gated_layer],
                    eps=self.options.candidate_gate_eps
                ):

            # regularization initialization
            if self.options.regularize_coef == 'const':
                wd_w = self.options.regularize_const
            elif self.options.regularize_coef == 'decay':
                wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
                                                  480000, 0.2, True)


            # Network-level objects / information
            n_inputs = self.master.num_inputs()
            drop_path_func = DropPath(
                drop_path_keep_prob=self.options.drop_path_keep_prob,
                max_train_steps=self.options.max_train_steps,
                total_depth=self.n_layers - n_inputs)

            l_hallu_costs = []
            # cell dictionary
            self.op_to_cell = dict()
            for cname in self.net_info.cell_names:
                hid_to_fs_params = _init_feature_select(
                    self.net_info[cname], cname, self.options.feat_sel_lambda)
                if cname == 'master':
                    # since master has additional duties like down_sampling,
                    # aux prediction, accumulating hallu stats, etc,
                    # master is not built from cell
                    master_hid_to_fs_params = hid_to_fs_params
                    hallu_record = _init_hallu_record(self.compute_hallu_stats)
                    continue

                self.op_to_cell[cname] = PetridishBaseCell(
                    self.net_info[cname],
                    self.data_format,
                    self.compute_hallu_stats,
                    drop_path_func=drop_path_func,
                    hid_to_fs_params=hid_to_fs_params,
                    l_hallu_costs=l_hallu_costs)

            l_layers = [None] * self.n_layers
            layer_dict = dict()
            out_filters = self.out_filters

            # on-GPU(device) preprocessing for mean/var, casting, embedding, init conv
            layer, label = self._preprocess_data(inputs)

            for layer_idx in range(n_inputs):
                info = self.master[layer_idx]
                layer_dict[info.id] = layer #if layer_idx + 1 == n_inputs else None

            for layer_idx in range(n_inputs, self.n_layers):
                info = self.master[layer_idx]
                layer_id_str = "layer{:03d}".format(info.id)
                strides = 1
                if info.down_sampling:
                    out_filters *= 2
                    strides = 2
                # preprocess all inputs to match the most recent layer
                # in h/w and out_filters in ch_dim
                #if not self.is_cell_based:
                with tf.variable_scope('pre_'+layer_id_str):
                    orig_dict = dict()
                    for input_id in info.inputs:
                        in_l = layer_dict[input_id]
                        orig_dict[input_id] = in_l
                        layer_dict[input_id] = _reduce_prev_layer(
                            in_l, input_id, layer, out_filters,
                            self.data_format, hw_only=False)
                layer = construct_layer(
                    layer_id_str, layer_dict, info, out_filters, strides,
                    self.data_format, info.stop_gradient,
                    op_to_cell=self.op_to_cell,
                    drop_path_func=drop_path_func,
                    non_input_layer_idx=layer_idx - n_inputs,
                    hid_to_fs_params=master_hid_to_fs_params,
                    l_hallu_costs=l_hallu_costs
                )

                # store info for future compute
                layer_dict[info.id] = layer
                l_layers[layer_idx] = layer
                hallu_record = _update_hallu_record(
                    self.compute_hallu_stats,
                    hallu_record, layer_idx, self.master, layer_dict)
                #if not self.is_cell_based and self.options.use_local_reduction:
                if self.options.use_local_reduction:
                    # reset the reduction layers in dict. So each layer
                    # uses its own reduction
                    for input_id in orig_dict:
                        layer_dict[input_id] = orig_dict[input_id]
            # end for layer wise feature construction.

            # build aux predictions
            total_cost = 0.0
            wd_cost = 0.0
            anytime_idx = -1
            for layer_idx, layer in enumerate(l_layers):
                # aux prediction
                info = self.master[layer_idx]
                cost_weight = info.aux_weight

                if cost_weight > 0:
                    anytime_idx += 1

                    scope_name = scope_prediction(info.id)
                    cost, variables = feature_to_prediction_and_loss(
                        scope_name, layer, label,
                        self.num_classes, self.prediction_feature,
                        ch_dim=self.ch_dim,
                        label_smoothing=self.options.label_smoothing,
                        dense_dropout_keep_prob=self.options.dense_dropout_keep_prob,
                        is_last=(layer_idx + 1 == len(l_layers)))

                    # record the cost for the use of online learners.
                    cost_i = tf.identity(cost, name='anytime_cost_{:02d}'.format(anytime_idx))

                    # decide whether to use static or dynmic weights
                    if self.options.ls_method == ADALOSS_LS_METHOD:
                        cost_weight = dynamic_weights[anytime_idx]
                    total_cost += cost_weight * cost_i

                    # regularize variable in linear predictors
                    # (have to do this separately here because
                    # we need unregularized losses for cost_weights)
                    for var in variables:
                        wd_cost += cost_weight * wd_w * tf.nn.l2_loss(var)
                # end if aux_weight > 0
            # end for each layer

            # regularization, cost
            if self.params_to_regularize is not None:
                wd_cost += wd_w * regularize_cost(self.params_to_regularize, tf.nn.l2_loss)
            wd_cost = tf.identity(wd_cost, name='wd_cost')
            total_cost = tf.identity(total_cost, name='sum_losses')
            add_moving_summary(total_cost, wd_cost)
            if l_hallu_costs:
                hallu_total_cost = tf.add_n(l_hallu_costs, name='hallu_total_cost')
                add_moving_summary(hallu_total_cost)
                self.cost = tf.add_n([total_cost, wd_cost, hallu_total_cost], name='cost')
            else:
                self.cost = tf.add_n([total_cost, wd_cost], name='cost')

            # hallu stats
            for cname in self.net_info.cell_names:
                if cname == 'master':
                    _hallu_stats_graph(
                        self.compute_hallu_stats, hallu_record, self.cost, scope=cname)
                    continue
                cell = self.op_to_cell.get(cname, None)
                cell_hallu_record = getattr(cell, 'hallu_record', None)
                _hallu_stats_graph_merged(
                    self.compute_hallu_stats, cell_hallu_record,
                    self.cost, scope=cname, n_calls=cell.n_calls,
                    layer_info_list=cell.layer_info_list)
            return self.cost
示例#10
0
    def _build_graph(self, inputs):

        # keep track of statistics to interpolate between different checkpoints
        # from 0 (only old checkpoint) to 1 (only new network)
        glbstep = tf.identity(get_global_step_var(), name="glob_step")
        seen_images = tf.cast(get_global_step_var() * BATCH_SIZE,
                              tf.int64,
                              name='seen_images')
        if TRANSITION:
            alpha = tf.divide(tf.cast(seen_images, tf.float32),
                              tf.cast(NUM_IMAGES, tf.float32),
                              name='alpha')
            transition_phase = tf.get_variable('transistion_phase',
                                               initializer=1.,
                                               trainable=False,
                                               dtype=tf.float32)
        else:
            alpha = tf.identity(0, name='alpha')
            transition_phase = tf.get_variable('transistion_phase',
                                               initializer=0.,
                                               trainable=False,
                                               dtype=tf.float32)
        add_moving_summary(alpha, seen_images,
                           tf.identity(transition_phase, name="transistion"),
                           glbstep)

        if TRANSITION:
            real_img, real_prev = inputs[0] / 128.0 - 1, Upsample(
                "upsample_realprev", inputs[1] / 128.0 - 1, factor=2)
            real_img = combine_img(real_img, real_prev, alpha)
        else:
            real_prev = None
            real_img, real_prev = inputs[0] / 128.0 - 1, None

        # noise which the generator is starting from
        z = tf.random_uniform([BATCH_SIZE, NOISE_DIM], -1, 1, name='z_train')
        z = tf.placeholder_with_default(z, [None, NOISE_DIM], name='z')

        # GENERATOR
        # ---------------------------------------------------------------------
        with tf.variable_scope('gen'):
            fake_img = self.generator(z, alpha=alpha)
            visualize_images('real_fake', real_img, fake_img)

            fake_output = (fake_img + 1.) * 128.
            fake_output = tf.cast(tf.clip_by_value(fake_output, 0, 255),
                                  tf.uint8,
                                  name='viz')
            tf.identity(fake_output, name='fake_img')

        # DISCRIMINATOR
        # ---------------------------------------------------------------------
        with tf.variable_scope('discrim'):
            WGAN_alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1, 1],
                                           minval=0.,
                                           maxval=1.,
                                           name='alpha')
            interp_img = real_img + WGAN_alpha * (fake_img - real_img)

            visualize_images('real_fake_interp', real_img, fake_img,
                             interp_img)

            real_score = self.discriminator(real_img, alpha=alpha)
            fake_score = self.discriminator(fake_img, alpha=alpha)
            interp_score = self.discriminator(interp_img, alpha=alpha)

            mean_real_score = tf.reduce_mean(real_score,
                                             name='mean_real_score')
            mean_fake_score = tf.reduce_mean(fake_score,
                                             name='mean_fake_score')
            mean_interp_score = tf.reduce_mean(interp_score,
                                               name='mean_interp_score')
            add_moving_summary(mean_real_score, mean_fake_score,
                               mean_interp_score)

        # the Wasserstein-GAN losses
        self.d_loss = tf.reduce_mean(fake_score - real_score, name='d_loss')
        self.g_loss = tf.negative(tf.reduce_mean(fake_score), name='g_loss')
        loss_diff = tf.subtract(self.g_loss, self.d_loss, name="loss-diff-g-d")
        add_moving_summary(self.d_loss, self.g_loss, loss_diff)

        # the gradient penalty loss
        def wasserstein_grad_penalty(score, input, name=None):
            with tf.name_scope(name):
                gradients = tf.gradients(score, [input])[0]
                gradients = tf.sqrt(
                    tf.reduce_sum(tf.square(gradients), [1, 2, 3]))
                gradients_rms = symbolic_functions.rms(gradients,
                                                       'gradient_rms')
                gradient_penalty = tf.reduce_mean(tf.square(gradients - 1),
                                                  name='gradient_penalty')
                return gradients_rms, gradient_penalty

        gradients_rms, gradient_penalty = wasserstein_grad_penalty(
            interp_score, interp_img)
        add_moving_summary(gradient_penalty, gradients_rms)

        # drift-loss
        drift_loss = tf.reduce_mean(tf.square(real_score), name='drift_loss')
        self.d_loss = tf.add_n(
            [self.d_loss, 10 * gradient_penalty, EPS_DRIFT * drift_loss],
            name='total_d_loss')
        add_moving_summary(self.d_loss, drift_loss)

        self.collect_variables()

        def count_params_in_scope(scope):
            vs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
            return np.sum([int(np.prod(v.shape)) for v in vs])

        logger.info(colored("Number of Parameters:", 'cyan'))
        logger.info("generator #params:     {:,}".format(
            count_params_in_scope('gen')))
        logger.info("discriminator #params: {:,}".format(
            count_params_in_scope('discrim')))
示例#11
0
    def _build_ad_nn(self, tensor_io):
        from drlutils.dataflow.tensor_io import TensorIO
        assert (isinstance(tensor_io, TensorIO))
        from drlutils.model.base import get_current_nn_context
        from tensorpack.tfutils.common import get_global_step_var
        global_step = get_global_step_var()
        nnc = get_current_nn_context()
        is_training = nnc.is_training
        i_state = tensor_io.getInputTensor('state')
        i_agentIdent = tensor_io.getInputTensor('agentIdent')
        i_sequenceLength = tensor_io.getInputTensor('sequenceLength')
        i_resetRNN = tensor_io.getInputTensor('resetRNN')
        l = i_state
        # l = tf.Print(l, [i_state, tf.shape(i_state)], 'State = ')
        # l = tf.Print(l, [i_agentIdent, tf.shape(i_agentIdent)], 'agentIdent = ')
        # l = tf.Print(l, [i_sequenceLength, tf.shape(i_sequenceLength)], 'SeqLen = ')
        # l = tf.Print(l, [i_resetRNN, tf.shape(i_resetRNN)], 'resetRNN = ')
        with tf.variable_scope('critic', reuse=nnc.reuse) as vs:

            def _get_cell():
                cell = tf.nn.rnn_cell.BasicLSTMCell(256)
                # if is_training:
                #     cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=0.9)
                return cell

            cell = tf.nn.rnn_cell.MultiRNNCell([_get_cell() for _ in range(1)])
            rnn_outputs = self._buildRNN(
                l,
                cell,
                tensor_io.batchSize,
                i_agentIdent=i_agentIdent,
                i_sequenceLength=i_sequenceLength,
                i_resetRNN=i_resetRNN,
            )
            rnn_outputs = tf.reshape(
                rnn_outputs, [-1, rnn_outputs.get_shape().as_list()[-1]])
            l = rnn_outputs
            from ad_cur.autodrive.model.selu import fc_selu
            for lidx in range(2):
                l = fc_selu(
                    l,
                    200,
                    keep_prob=1.,  # 由于我们只使用传感器训练,关键信息不能丢
                    is_training=is_training,
                    name='fc-{}'.format(lidx))
            value = tf.layers.dense(l, 1, name='fc-value')
            value = tf.squeeze(value, [1], name="value")
            if not hasattr(self, '_weights_critic'):
                self._weights_critic = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope('actor', reuse=nnc.reuse) as vs:
            l = tf.stop_gradient(l)
            l = tf.layers.dense(l,
                                128,
                                activation=tf.nn.relu6,
                                name='fc-actor')
            mu_steering = 0.5 * tf.layers.dense(
                l, 1, activation=tf.nn.tanh, name='fc-mu-steering')
            mu_accel = tf.layers.dense(l,
                                       1,
                                       activation=tf.nn.tanh,
                                       name='fc-mu-accel')
            mus = tf.concat([mu_steering, mu_accel], axis=-1)

            # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus')
            # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas')
            # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5)
            def saturating_sigmoid(x):
                """Saturating sigmoid: 1.2 * sigmoid(x) - 0.1 cut to [0, 1]."""
                with tf.name_scope("saturating_sigmoid", [x]):
                    y = tf.sigmoid(x)
                    return tf.minimum(1.0, tf.maximum(0.0, 1.2 * y - 0.1))

            sigma_steering_ = 0.1 * tf.layers.dense(
                l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering')
            sigma_accel_ = 0.25 * tf.layers.dense(
                l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel')

            if not nnc.is_evaluating:
                sigma_beta_steering = tf.get_default_graph(
                ).get_tensor_by_name('actor/sigma_beta_steering:0')
                sigma_beta_accel = tf.get_default_graph().get_tensor_by_name(
                    'actor/sigma_beta_accel:0')
                sigma_beta_steering = tf.constant(1e-4)
                # sigma_beta_steering_exp = tf.train.exponential_decay(0.3, global_step, 1000, 0.5, name='sigma/beta/steering/exp')
                # sigma_beta_accel_exp = tf.train.exponential_decay(0.5, global_step, 5000, 0.5, name='sigma/beta/accel/exp')
            else:
                sigma_beta_steering = tf.constant(1e-4)
                sigma_beta_accel = tf.constant(1e-4)
            sigma_steering = (sigma_steering_ + sigma_beta_steering)
            sigma_accel = (sigma_accel_ + sigma_beta_accel)

            sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1)
            # if is_training:
            #     pass
            #     # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因:
            #     #   1、训练前期尽量大的探索可以避免网络陷入局部最优
            #     #   2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来
            #
            # if is_training:
            #     sigmas += sigma_beta_steering
            # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5)
            # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5)
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigmas_orig = sigmas
            # sigmas = sigmas + sigma_beta_steering
            # sigmas = tf.minimum(sigmas + 0.1, 100)
            # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1)
            # sigma_steering += sigma_beta_steering
            # sigma_accel += sigma_beta_accel

            # mus = tf.concat([mu_steering, mu_accel], axis=-1)

            from tensorflow.contrib.distributions import Normal
            dists = Normal(mus, sigmas + 0.01)
            policy = tf.squeeze(dists.sample([1]), [0])
            # 裁剪到两倍方差之内
            policy = tf.clip_by_value(policy, mus - 2 * sigmas,
                                      mus + 2 * sigmas)
            if is_training:
                self._addMovingSummary(
                    tf.reduce_mean(mu_steering, name='mu/steering/mean'),
                    tf.reduce_mean(mu_accel, name='mu/accel/mean'),
                    tf.reduce_mean(sigma_steering, name='sigma/steering/mean'),
                    tf.reduce_max(sigma_steering, name='sigma/steering/max'),
                    tf.reduce_mean(sigma_accel, name='sigma/accel/mean'),
                    tf.reduce_max(sigma_accel, name='sigma/accel/max'),
                    # sigma_beta_accel,
                    # sigma_beta_steering,
                )
            # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions],
            #                    'mu/sigma/sigma.orig/act=', summarize=4)
            if not hasattr(self, '_weights_actor'):
                self._weights_actor = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)
        if not is_training:
            tensor_io.setOutputTensors(policy, value, mus, sigmas)
            return

        i_actions = tensor_io.getInputTensor("action")
        # i_actions = tf.Print(i_actions, [i_actions], 'actions = ')
        i_actions = tf.reshape(i_actions,
                               [-1] + i_actions.get_shape().as_list()[2:])
        log_probs = dists.log_prob(i_actions)
        # exp_v = tf.transpose(
        #     tf.multiply(tf.transpose(log_probs), advantage))
        # exp_v = tf.multiply(log_probs, advantage)
        i_advantage = tensor_io.getInputTensor("advantage")
        i_advantage = tf.reshape(i_advantage,
                                 [-1] + i_advantage.get_shape().as_list()[2:])
        exp_v = log_probs * tf.expand_dims(i_advantage, -1)
        entropy = dists.entropy()
        entropy_beta = tf.get_variable(
            'entropy_beta',
            shape=[],
            initializer=tf.constant_initializer(0.01),
            trainable=False)
        exp_v = entropy_beta * entropy + exp_v
        loss_policy = tf.reduce_mean(-tf.reduce_sum(exp_v, axis=-1),
                                     name='loss/policy')

        i_futurereward = tensor_io.getInputTensor("futurereward")
        i_futurereward = tf.reshape(i_futurereward, [-1] +
                                    i_futurereward.get_shape().as_list()[2:])
        loss_value = tf.reduce_mean(0.5 * tf.square(value - i_futurereward))

        loss_entropy = tf.reduce_mean(tf.reduce_sum(entropy, axis=-1),
                                      name='xentropy_loss')

        from tensorflow.contrib.layers.python.layers.regularizers import apply_regularization, l2_regularizer
        loss_l2_regularizer = apply_regularization(l2_regularizer(1e-4),
                                                   self._weights_critic)
        loss_l2_regularizer = tf.identity(loss_l2_regularizer, 'loss/l2reg')
        loss_value += loss_l2_regularizer
        loss_value = tf.identity(loss_value, name='loss/value')

        # self.cost = tf.add_n([loss_policy, loss_value * 0.1, loss_l2_regularizer])

        self._addParamSummary([('.*', ['rms', 'absmax'])])
        pred_reward = tf.reduce_mean(value, name='predict_reward')
        import tensorpack.tfutils.symbolic_functions as symbf
        advantage = symbf.rms(i_advantage, name='rms_advantage')
        self._addMovingSummary(
            loss_policy,
            loss_value,
            loss_entropy,
            pred_reward,
            advantage,
            loss_l2_regularizer,
            tf.reduce_mean(policy[:, 0], name='actor/steering/mean'),
            tf.reduce_mean(policy[:, 1], name='actor/accel/mean'),
        )
        return loss_policy, loss_value
示例#12
0
文件: invert.py 项目: KryoEM/tfmodels

if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--logdir', help='logdir', default='')
    args = parser.parse_args()

    # P_py  = np.load('/jasper/models/gp140/P_py.npy')
    Ppy = np.load('/jasper/models/BetaGal/betagal1.5_projections.npy')

    Ppy = Ppy[0]  # leave only first symmetric unit
    vlen, nviews = Ppy.shape[-1], Ppy.shape[0]

    os.environ['CUDA_VISIBLE_DEVICES'] = get_visible_device_list(3)
    global_step = get_global_step_var()

    # set logger directory for checkpoints, etc
    logger.set_logger_dir(args.logdir, action='k')

    steps_per_epoch = cfg.EPOCH_STEPS
    model = Model(vlen, nviews)
    # config.gpu_options.allow_growth = True
    traincfg = TrainConfig(
        model=model,
        data=QueueInput(ProjDataFlow(Ppy)),
        callbacks=[
            PeriodicTrigger(ModelSaver(), every_k_epochs=5),
            PeriodicTrigger(VolumeSaver(model), every_k_epochs=5),
            # prevent learning in the first epoch
            # MemInitHyperParamSetter('learning_rate_mask',(0,1)),
示例#13
0
文件: main.py 项目: waxz/ppo_torcs
    def _get_NN_prediction(self, state):
        from tensorpack.tfutils import symbolic_functions
        ctx = get_current_tower_context()
        is_training = ctx.is_training
        l = state
        # l = tf.Print(l, [state], 'State = ')
        with tf.variable_scope('critic') as vs:

            from autodrive.model.selu import fc_selu
            for lidx in range(8):
                l = fc_selu(l, 200,
                            keep_prob=1., # 由于我们只使用传感器训练,关键信息不能丢
                            is_training=is_training, name='fc-{}'.format(lidx))
            # l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc-dense')
            # for lidx, hidden_size in enumerate([300, 600]):
            #     l = tf.layers.dense(l, hidden_size, activation=tf.nn.relu, name='fc-%d'%lidx)
            value = tf.layers.dense(l, 1, name='fc-value',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
            if not hasattr(self, '_weights_critic'):
                self._weights_critic = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope('actor') as vs:
            l = tf.stop_gradient(l)
            mu_steering = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mus = tf.concat([mu_steering, mu_accel], axis=-1)
            # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus')
            # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas')
            # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5)
            sigma_steering_ = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            sigma_accel_ = 1. * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            # sigma_beta_steering = symbolic_functions.get_scalar_var('sigma_beta_steering', 0.3, summary=True, trainable=False)
            # sigma_beta_accel = symbolic_functions.get_scalar_var('sigma_beta_accel', 0.3, summary=True, trainable=False)
            from tensorpack.tfutils.common import get_global_step_var
            sigma_beta_steering_exp = tf.train.exponential_decay(0.001, get_global_step_var(), 1000, 0.5, name='sigma/beta/steering/exp')
            sigma_beta_accel_exp = tf.train.exponential_decay(0.5, get_global_step_var(), 5000, 0.5, name='sigma/beta/accel/exp')
            # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5)
            # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2)
            # sigma_steering = sigma_steering_
            sigma_steering = (sigma_steering_ + sigma_beta_steering_exp)
            sigma_accel = (sigma_accel_ + sigma_beta_accel_exp) #* 0.1
            # sigma_steering = sigma_steering_
            # sigma_accel = sigma_accel_
            sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1)
            #     sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5)

            #     sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5)

            # sigmas = sigmas_orig + 0.001
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32,
            #                              initializer=tf.constant_initializer(.5), trainable=False)

            # if is_training:
            #     pass
            #     # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因:
            #     #   1、训练前期尽量大的探索可以避免网络陷入局部最优
            #     #   2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来
            #
            # if is_training:
            #     sigmas += sigma_beta_steering
            # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5)
            # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5)
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigmas_orig = sigmas
            # sigmas = sigmas + sigma_beta_steering
            # sigmas = tf.minimum(sigmas + 0.1, 100)
            # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1)
            # sigma_steering += sigma_beta_steering
            # sigma_accel += sigma_beta_accel

            # mus = tf.concat([mu_steering, mu_accel], axis=-1)

            from tensorflow.contrib.distributions import Normal
            dists = Normal(mus, sigmas+1e-3)
            actions = tf.squeeze(dists.sample([1]), [0])
            # 裁剪到一倍方差之内
            # actions = tf.clip_by_value(actions, -1., 1.)
            if is_training:
                summary.add_moving_summary(tf.reduce_mean(mu_steering, name='mu/steering/mean'),
                                           tf.reduce_mean(mu_accel, name='mu/accel/mean'),
                                           tf.reduce_mean(sigma_steering, name='sigma/steering/mean'),
                                           tf.reduce_max(sigma_steering, name='sigma/steering/max'),
                                           tf.reduce_mean(sigma_accel, name='sigma/accel/mean'),
                                           tf.reduce_max(sigma_accel, name='sigma/accel/max'),
                                           sigma_beta_accel_exp,
                                           sigma_beta_steering_exp,
                                           )
            # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions],
            #                    'mu/sigma/sigma.orig/act=', summarize=4)
            if not hasattr(self, '_weights_actor'):
                self._weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        return actions, value, dists
示例#14
0
文件: main.py 项目: waxz/ppo_torcs
    def _build_graph(self, inputs):
        from tensorpack.tfutils.common import get_global_step_var
        state, action, futurereward, advantage = inputs
        is_training = get_current_tower_context().is_training
        policy, value, dists = self._get_NN_prediction(state)
        if not hasattr(self, '_weights_train'):
            self._weights_train = self._weights_critic + self._weights_actor
        self.value = tf.squeeze(value, [1], name='value')  # (B,)
        self.policy = tf.identity(policy, name='policy')

        with tf.variable_scope("Pred") as vs:
            __p, __v, _ = self._get_NN_prediction(state)
            __v = tf.squeeze(__v, [1], name='value')  # (B,)
            __p = tf.identity(__p, name='policy')
            if not hasattr(self, '_weights_pred'):
                self._weights_pred = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)
                assert (len(self._weights_train) == len(self._weights_pred))
                assert (not hasattr(self, '_sync_op'))
                self._sync_op = tf.group(*[d.assign(s + tf.truncated_normal(tf.shape(s), stddev=0.02)) for d, s in zip(self._weights_pred, self._weights_train)])

        with tf.variable_scope('pre') as vs:
            pre_p,pre_v,pre_dists=self._get_NN_prediction(state)
            if not hasattr(self,'pre_weights'):
                self.pre_weights=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=vs.name)
                self._td_sync_op = tf.group(*[d.assign(s) for d, s in zip(self.pre_weights, self._weights_train)])


        if not is_training:
            return

        # advantage = tf.subtract(tf.stop_gradient(self.value), futurereward, name='advantage')
        # advantage = tf.Print(advantage, [self.value, futurereward, action, advantage], 'value/reward/act/advantage=', summarize=4)
        log_probs = dists.log_prob(action)
        #add  ppo policy clip loss
        #add ratio  ,surr1, surr2
        pre_probs=pre_dists.log_prob(action)
        ratio=tf.exp(log_probs-pre_probs)
        prob_ratio = tf.reduce_mean(input_tensor=tf.concat(values=ratio, axis=1), axis=1)
        clip_param=tf.train.exponential_decay(CLIP_PARAMETER, get_global_step_var(), 10000, 0.98, name='clip_param')


        # surr1=prob_ratio*advantage
        surr1=ratio*tf.expand_dims(advantage, -1)
        surr2=tf.clip_by_value(ratio,1.0-clip_param,1.0+clip_param)*tf.expand_dims(advantage, -1)
        
        # surr2=tf.clip_by_value(prob_ratio,1.0-clip_param,1.0+clip_param)*advantage

        loss_policy=-tf.reduce_mean(tf.minimum(surr1,surr2))

        #add critic clip loss
        v_loss1=tf.square(value-futurereward)
        pre_value=pre_v+tf.clip_by_value(value-pre_v,-clip_param,clip_param)
        v_loss2=tf.square(pre_v-futurereward)
        # loss_value=0.5*tf.reduce_mean(tf.maximum(v_loss1,v_loss2))
        loss_value=0.5*tf.reduce_mean(v_loss1)
        

        entropy = dists.entropy()
        entropy_beta = tf.get_variable('entropy_beta', shape=[],
                                       initializer=tf.constant_initializer(0.01), trainable=False)
        exp_v = entropy_beta * entropy
        loss_entropy = tf.reduce_mean(-tf.reduce_sum(exp_v, axis=-1), name='loss/policy')
        loss_policy=loss_policy+loss_entropy
        

        # exp_v = tf.transpose(
        #     tf.multiply(tf.transpose(log_probs), advantage))
        # exp_v = tf.multiply(log_probs, advantage)
        # exp_v = log_probs * tf.expand_dims(advantage, -1)
        # entropy = dists.entropy()
        # entropy_beta = tf.get_variable('entropy_beta', shape=[],
        #                                initializer=tf.constant_initializer(0.01), trainable=False)
        # exp_v = entropy_beta * entropy + exp_v
        
        # loss_value = tf.reduce_mean(0.5 * tf.square(self.value - futurereward))

        # loss_entropy = tf.reduce_mean(tf.reduce_sum(entropy, axis=-1), name='xentropy_loss')


        from tensorflow.contrib.layers.python.layers.regularizers import apply_regularization, l2_regularizer
        loss_l2_regularizer = apply_regularization(l2_regularizer(1e-4), self._weights_critic)
        loss_l2_regularizer = tf.identity(loss_l2_regularizer, 'loss/l2reg')
        loss_value += loss_l2_regularizer
        loss_value = tf.identity(loss_value, name='loss/value')

        # self.cost = tf.add_n([loss_policy, loss_value * 0.1, loss_l2_regularizer])
        self._cost = [loss_policy,
                      loss_value
                      ]
        from autodrive.trainer.summary import addParamSummary
        addParamSummary([('.*', ['rms', 'absmax'])])
        pred_reward = tf.reduce_mean(self.value, name='predict_reward')
        advantage = symbf.rms(advantage, name='rms_advantage')
        summary.add_moving_summary(loss_policy, loss_value,
                                   loss_entropy,
                                   pred_reward, advantage,
                                   loss_l2_regularizer,
                                   tf.reduce_mean(self.policy[:, 0], name='action/steering/mean'),
                                   tf.reduce_mean(self.policy[:, 1], name='action/accel/mean'),
                                    )
示例#15
0
    def build_graph(self, image1, label1, image2, _):
        image1 = self.image_preprocess(image1)
        image2 = self.image_preprocess(image2)
        is_training = get_current_tower_context().is_training

        # Shuffle unlabeled data within batch
        if is_training:
            image2 = tf.random_shuffle(image2)
        
        assert self.data_format in ['NCHW', 'NHWC']
        if self.data_format == 'NCHW':
            image1 = tf.transpose(image1, [0, 3, 1, 2])
            image2 = tf.transpose(image2, [0, 3, 1, 2])

        # Pseudo Label
        logits2, _ = self.get_logits(image2)
        label2 = tf.nn.softmax(logits2)
        
        # Change this line if you modified training schedule or batchsize: 60 Epoch_num, 256 Batch_size
        k = tf.cast(get_global_step_var(), tf.float32) /  (60 * 1280000 / 256)
        
        # Sample lambda
        dist_beta = tf.distributions.Beta(1.0, 1.0)
        lmb = dist_beta.sample(tf.shape(image1)[0])
        lmb_x = tf.reshape(lmb, [-1, 1, 1, 1])
        lmb_y = tf.reshape(lmb, [-1, 1])
        
        # Interpolation        
        label_ori = label1
        if is_training:
            image = tf.to_float(image1) * lmb_x + tf.to_float(image2) * (1. - lmb_x)
            label = tf.stop_gradient(tf.to_float(tf.one_hot(label1, 1000)) * lmb_y + tf.to_float(label2) * (1. - lmb_y))
        else:
            image = image1
            label = tf.to_float(tf.one_hot(label1, 1000))

        # Calculate feats and logits for interpolated samples 
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            logits, features = self.get_logits(image)
        
        # Classification Loss and error 
        loss = ImageNetModel.compute_loss_and_error(
            logits, label, label_smoothing=self.label_smoothing, lmb=lmb, label_ori=label_ori)

        # Distribution Alignment 
        lp = 2. / (1. + tf.exp(-10. * k)) - 1
        net_ = flip_gradient(features, lp)
        fc1 = FullyConnected('linear_1', net_, 1024, nl=tf.nn.relu)
        fc2 = FullyConnected('linear_2', fc1, 1024, nl=tf.nn.relu)
        domain_logits = FullyConnected("logits_dm", fc2, 2)
        label_dm = tf.concat([tf.reshape(lmb, [-1, 1]), tf.reshape(1. - lmb, [-1, 1])], axis=1)
        da_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label_dm, logits=domain_logits))
        
        # Final Loss
        loss += da_cost


        if self.weight_decay > 0:
            wd_loss = regularize_cost(self.weight_decay_pattern,
                                      tf.contrib.layers.l2_regularizer(self.weight_decay),
                                      name='l2_regularize_loss')
            add_moving_summary(loss, wd_loss)
            total_cost = tf.add_n([loss, wd_loss], name='cost')
        else:
            total_cost = tf.identity(loss, name='cost')
            add_moving_summary(total_cost)

        if self.loss_scale != 1.:
            logger.info("Scaling the total loss by {} ...".format(self.loss_scale))
            return total_cost * self.loss_scale
        else:
            return total_cost
 def _setup_graph(self):
     global_step = get_global_step_var()
     self.assign_op = global_step.assign(self.global_step_val)
示例#17
0
文件: main.py 项目: waxz/ppo_torcs
    def _get_NN_prediction(self, state):
        from tensorpack.tfutils import symbolic_functions
        ctx = get_current_tower_context()
        is_training = ctx.is_training
        l = state
        # l = tf.Print(l, [state], 'State = ')
        with tf.variable_scope('critic') as vs:

            from autodrive.model.selu import fc_selu
            for lidx in range(8):
                l = fc_selu(
                    l,
                    200,
                    keep_prob=1.,  # 由于我们只使用传感器训练,关键信息不能丢
                    is_training=is_training,
                    name='fc-{}'.format(lidx))
            # l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc-dense')
            # for lidx, hidden_size in enumerate([300, 600]):
            #     l = tf.layers.dense(l, hidden_size, activation=tf.nn.relu, name='fc-%d'%lidx)
            value = tf.layers.dense(l, 1, name='fc-value',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
            if not hasattr(self, '_weights_critic'):
                self._weights_critic = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope('actor') as vs:
            l = tf.stop_gradient(l)
            mu_steering = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mus = tf.concat([mu_steering, mu_accel], axis=-1)
            # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus')
            # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas')
            # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5)
            sigma_steering_ = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            sigma_accel_ = 1. * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            # sigma_beta_steering = symbolic_functions.get_scalar_var('sigma_beta_steering', 0.3, summary=True, trainable=False)
            # sigma_beta_accel = symbolic_functions.get_scalar_var('sigma_beta_accel', 0.3, summary=True, trainable=False)
            from tensorpack.tfutils.common import get_global_step_var
            sigma_beta_steering_exp = tf.train.exponential_decay(
                0.001,
                get_global_step_var(),
                1000,
                0.5,
                name='sigma/beta/steering/exp')
            sigma_beta_accel_exp = tf.train.exponential_decay(
                0.5,
                get_global_step_var(),
                5000,
                0.5,
                name='sigma/beta/accel/exp')
            # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5)
            # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2)
            # sigma_steering = sigma_steering_
            sigma_steering = (sigma_steering_ + sigma_beta_steering_exp)
            sigma_accel = (sigma_accel_ + sigma_beta_accel_exp)  #* 0.1
            # sigma_steering = sigma_steering_
            # sigma_accel = sigma_accel_
            sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1)
            #     sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5)

            #     sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5)

            # sigmas = sigmas_orig + 0.001
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32,
            #                              initializer=tf.constant_initializer(.5), trainable=False)

            # if is_training:
            #     pass
            #     # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因:
            #     #   1、训练前期尽量大的探索可以避免网络陷入局部最优
            #     #   2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来
            #
            # if is_training:
            #     sigmas += sigma_beta_steering
            # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5)
            # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5)
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigmas_orig = sigmas
            # sigmas = sigmas + sigma_beta_steering
            # sigmas = tf.minimum(sigmas + 0.1, 100)
            # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1)
            # sigma_steering += sigma_beta_steering
            # sigma_accel += sigma_beta_accel

            # mus = tf.concat([mu_steering, mu_accel], axis=-1)

            from tensorflow.contrib.distributions import Normal
            dists = Normal(mus, sigmas + 1e-3)
            actions = tf.squeeze(dists.sample([1]), [0])
            # 裁剪到一倍方差之内
            # actions = tf.clip_by_value(actions, -1., 1.)
            if is_training:
                summary.add_moving_summary(
                    tf.reduce_mean(mu_steering, name='mu/steering/mean'),
                    tf.reduce_mean(mu_accel, name='mu/accel/mean'),
                    tf.reduce_mean(sigma_steering, name='sigma/steering/mean'),
                    tf.reduce_max(sigma_steering, name='sigma/steering/max'),
                    tf.reduce_mean(sigma_accel, name='sigma/accel/mean'),
                    tf.reduce_max(sigma_accel, name='sigma/accel/max'),
                    sigma_beta_accel_exp,
                    sigma_beta_steering_exp,
                )
            # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions],
            #                    'mu/sigma/sigma.orig/act=', summarize=4)
            if not hasattr(self, '_weights_actor'):
                self._weights_actor = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        return actions, value, dists
示例#18
0
文件: main.py 项目: waxz/ppo_torcs
    def _build_graph(self, inputs):
        from tensorpack.tfutils.common import get_global_step_var
        state, action, futurereward, advantage = inputs
        is_training = get_current_tower_context().is_training
        policy, value, dists = self._get_NN_prediction(state)
        if not hasattr(self, '_weights_train'):
            self._weights_train = self._weights_critic + self._weights_actor
        self.value = tf.squeeze(value, [1], name='value')  # (B,)
        self.policy = tf.identity(policy, name='policy')

        with tf.variable_scope("Pred") as vs:
            __p, __v, _ = self._get_NN_prediction(state)
            __v = tf.squeeze(__v, [1], name='value')  # (B,)
            __p = tf.identity(__p, name='policy')
            if not hasattr(self, '_weights_pred'):
                self._weights_pred = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)
                assert (len(self._weights_train) == len(self._weights_pred))
                assert (not hasattr(self, '_sync_op'))
                self._sync_op = tf.group(*[
                    d.assign(s + tf.truncated_normal(tf.shape(s), stddev=0.02))
                    for d, s in zip(self._weights_pred, self._weights_train)
                ])

        with tf.variable_scope('pre') as vs:
            pre_p, pre_v, pre_dists = self._get_NN_prediction(state)
            if not hasattr(self, 'pre_weights'):
                self.pre_weights = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)
                self._td_sync_op = tf.group(*[
                    d.assign(s)
                    for d, s in zip(self.pre_weights, self._weights_train)
                ])

        if not is_training:
            return

        # advantage = tf.subtract(tf.stop_gradient(self.value), futurereward, name='advantage')
        # advantage = tf.Print(advantage, [self.value, futurereward, action, advantage], 'value/reward/act/advantage=', summarize=4)
        log_probs = dists.log_prob(action)
        #add  ppo policy clip loss
        #add ratio  ,surr1, surr2
        pre_probs = pre_dists.log_prob(action)
        ratio = tf.exp(log_probs - pre_probs)
        prob_ratio = tf.reduce_mean(input_tensor=tf.concat(values=ratio,
                                                           axis=1),
                                    axis=1)
        clip_param = tf.train.exponential_decay(CLIP_PARAMETER,
                                                get_global_step_var(),
                                                10000,
                                                0.98,
                                                name='clip_param')

        # surr1=prob_ratio*advantage
        surr1 = ratio * tf.expand_dims(advantage, -1)
        surr2 = tf.clip_by_value(ratio, 1.0 - clip_param, 1.0 +
                                 clip_param) * tf.expand_dims(advantage, -1)

        # surr2=tf.clip_by_value(prob_ratio,1.0-clip_param,1.0+clip_param)*advantage

        loss_policy = -tf.reduce_mean(tf.minimum(surr1, surr2))

        #add critic clip loss
        v_loss1 = tf.square(value - futurereward)
        pre_value = pre_v + tf.clip_by_value(value - pre_v, -clip_param,
                                             clip_param)
        v_loss2 = tf.square(pre_v - futurereward)
        # loss_value=0.5*tf.reduce_mean(tf.maximum(v_loss1,v_loss2))
        loss_value = 0.5 * tf.reduce_mean(v_loss1)

        entropy = dists.entropy()
        entropy_beta = tf.get_variable(
            'entropy_beta',
            shape=[],
            initializer=tf.constant_initializer(0.01),
            trainable=False)
        exp_v = entropy_beta * entropy
        loss_entropy = tf.reduce_mean(-tf.reduce_sum(exp_v, axis=-1),
                                      name='loss/policy')
        loss_policy = loss_policy + loss_entropy

        # exp_v = tf.transpose(
        #     tf.multiply(tf.transpose(log_probs), advantage))
        # exp_v = tf.multiply(log_probs, advantage)
        # exp_v = log_probs * tf.expand_dims(advantage, -1)
        # entropy = dists.entropy()
        # entropy_beta = tf.get_variable('entropy_beta', shape=[],
        #                                initializer=tf.constant_initializer(0.01), trainable=False)
        # exp_v = entropy_beta * entropy + exp_v

        # loss_value = tf.reduce_mean(0.5 * tf.square(self.value - futurereward))

        # loss_entropy = tf.reduce_mean(tf.reduce_sum(entropy, axis=-1), name='xentropy_loss')

        from tensorflow.contrib.layers.python.layers.regularizers import apply_regularization, l2_regularizer
        loss_l2_regularizer = apply_regularization(l2_regularizer(1e-4),
                                                   self._weights_critic)
        loss_l2_regularizer = tf.identity(loss_l2_regularizer, 'loss/l2reg')
        loss_value += loss_l2_regularizer
        loss_value = tf.identity(loss_value, name='loss/value')

        # self.cost = tf.add_n([loss_policy, loss_value * 0.1, loss_l2_regularizer])
        self._cost = [loss_policy, loss_value]
        from autodrive.trainer.summary import addParamSummary
        addParamSummary([('.*', ['rms', 'absmax'])])
        pred_reward = tf.reduce_mean(self.value, name='predict_reward')
        advantage = symbf.rms(advantage, name='rms_advantage')
        summary.add_moving_summary(
            loss_policy,
            loss_value,
            loss_entropy,
            pred_reward,
            advantage,
            loss_l2_regularizer,
            tf.reduce_mean(self.policy[:, 0], name='action/steering/mean'),
            tf.reduce_mean(self.policy[:, 1], name='action/accel/mean'),
        )