Exemplo n.º 1
0
def get_config():
    M = Model()

    dataflow = data_io
    from tensorpack.callbacks.base import Callback

    class CBSyncWeight(Callback):
        def _before_run(self, ctx):
            if self.local_step % 10 == 0:
                return [M._sync_op_pred]

    import functools
    from tensorpack.train.config import TrainConfig
    from tensorpack.callbacks.saver import ModelSaver
    from tensorpack.callbacks.graph import RunOp
    from tensorpack.callbacks.param import ScheduledHyperParamSetter, HumanHyperParamSetter, HyperParamSetterWithFunc
    from tensorpack.tfutils import sesscreate
    from tensorpack.tfutils.common import get_default_sess_config
    import tensorpack.tfutils.symbolic_functions as symbf

    sigma_beta_steering = symbf.get_scalar_var('actor/sigma_beta_steering',
                                               0.3,
                                               summary=True,
                                               trainable=False)
    sigma_beta_accel = symbf.get_scalar_var('actor/sigma_beta_accel',
                                            0.3,
                                            summary=True,
                                            trainable=False)

    return TrainConfig(
        model=M,
        data=dataflow,
        callbacks=[
            ModelSaver(),
            HyperParamSetterWithFunc(
                'learning_rate/actor',
                functools.partial(M._calc_learning_rate, 'actor')),
            HyperParamSetterWithFunc(
                'learning_rate/critic',
                functools.partial(M._calc_learning_rate, 'critic')),

            # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            # HumanHyperParamSetter('learning_rate'),
            # HumanHyperParamSetter('entropy_beta'),
            ScheduledHyperParamSetter('actor/sigma_beta_accel', [(1, 0.2),
                                                                 (2, 0.01)]),
            ScheduledHyperParamSetter('actor/sigma_beta_steering',
                                      [(1, 0.1), (2, 0.01)]),
            CBSyncWeight(),
            data_io,
            # PeriodicTrigger(Evaluator(
            #     EVAL_EPISODE, ['state'], ['policy'], get_player),
            #     every_k_epochs=3),
        ] + evaluators,
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Exemplo n.º 2
0
def get_config(model, algorithm_name):
    logger.auto_set_dir()

    dataset = model.get_data()
    steps_per_epoch = dataset.size()

    lr = symbf.get_scalar_var('learning_rate', 1e-4, summary=True)

    extra_display = ["cost"]
    if not algorithm_name == "cosine":
        extra_display = extra_display + ["loss/pos-dist", "loss/neg-dist"]

    return TrainConfig(
        dataflow=dataset,
        model=model(),
        optimizer=tf.train.GradientDescentOptimizer(lr),
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(10, 1e-5),
                                                        (20, 1e-6)])
        ],
        extra_callbacks=[
            MovingAverageSummary(),
            ProgressBar(extra_display),
            StatPrinter()
        ],
        steps_per_epoch=steps_per_epoch,
        max_epoch=20,
    )
Exemplo n.º 3
0
 def _get_optimizer(self, name):
     from tensorpack.tfutils import optimizer
     from tensorpack.tfutils.gradproc import SummaryGradient, GlobalNormClip, MapGradient
     init_lr = INIT_LEARNING_RATE_A if name == 'actor' else INIT_LEARNING_RATE_C
     import tensorpack.tfutils.symbolic_functions as symbf
     lr = symbf.get_scalar_var('learning_rate/' + name,
                               init_lr,
                               summary=True)
     opt = tf.train.AdamOptimizer(lr)
     logger.info("create opt {}".format(name))
     if name == 'critic':
         gradprocs = [
             MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05),
                         regex='^critic/.*')
         ]
     elif name == 'actor':
         gradprocs = [
             MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1),
                         regex='^actor/.*')
         ]
     else:
         assert (0)
     gradprocs.append(SummaryGradient())
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
Exemplo n.º 4
0
def get_config():
    logger.auto_set_dir()
    M = Model()

    master = MySimulatorMaster(namec2s, names2c, M)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)

    lr = symbf.get_scalar_var('learning_rate', 0.0001, summary=True)
    return TrainConfig(
        dataset=dataflow,
        optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
        callbacks=Callbacks([
            StatPrinter(),
            ModelSaver(),
            HumanHyperParamSetter('learning_rate', 'hyper.txt'),
            HumanHyperParamSetter('entropy_beta', 'hyper.txt'),
            HumanHyperParamSetter('explore_factor', 'hyper.txt'),
            master,
            StartProcOrThread(master),
            #      PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['logits']), 1),
            GlobalStepSetter(),
        ]),
        session_config=get_default_sess_config(0.5),
        model=M,
        step_per_epoch=STEP_PER_EPOCH,
        max_epoch=1000,
    )
Exemplo n.º 5
0
def get_config(cifar_classnum):
    logger.auto_set_dir()

    # prepare dataset
    dataset_train = get_data('train', cifar_classnum)
    step_per_epoch = dataset_train.size()
    dataset_test = get_data('test', cifar_classnum)

    sess_config = get_default_sess_config(0.5)

    lr = symbf.get_scalar_var('learning_rate', 1e-2)
    def lr_func(lr):
        if lr < 3e-5:
            raise StopTraining()
        return lr * 0.31

    return TrainConfig(
        dataset=dataset_train,
        optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
        callbacks=Callbacks([
            StatPrinter(), ModelSaver(),
            InferenceRunner(dataset_test, ClassificationError()),
            StatMonitorParamSetter('learning_rate', 'val_error', lr_func,
                threshold=0.001, last_k=10),
        ]),
        session_config=sess_config,
        model=Model(cifar_classnum),
        step_per_epoch=step_per_epoch,
        max_epoch=150,
    )
Exemplo n.º 6
0
def get_config():
    logger.auto_set_dir()
    M = Model()

    name_base = str(uuid.uuid1())[:6]
    PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '.').rstrip('/')
    namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base)
    names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base)
    procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, M)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)

    lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
    return TrainConfig(
        dataset=dataflow,
        optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
        callbacks=Callbacks([
            StatPrinter(), ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(80, 0.0003), (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            ScheduledHyperParamSetter('explore_factor',
                [(80, 2), (100, 3), (120, 4), (140, 5)]),
            master,
            StartProcOrThread(master),
            PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['logits']), 2),
        ]),
        session_config=get_default_sess_config(0.5),
        model=M,
        step_per_epoch=STEP_PER_EPOCH,
        max_epoch=1000,
    )
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 5e-4, summary=True)
     opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     return optimizer.apply_grad_processors(opt, [
         gradproc.ScaleGradient(('STN.*', 0.1)),
         gradproc.SummaryGradient()
     ])
Exemplo n.º 8
0
    def _get_optimizer(self):
        lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
        opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)

        gradprocs = [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
                     SummaryGradient()]
        opt = optimizer.apply_grad_processors(opt, gradprocs)
        return opt
Exemplo n.º 9
0
    def _get_optimizer(self):
        lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
        opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)

        gradprocs = [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
                     SummaryGradient()]
        opt = optimizer.apply_grad_processors(opt, gradprocs)
        return opt
Exemplo n.º 10
0
    def _get_optimizer(self):
        lr = symbf.get_scalar_var('learning_rate', 0.003, summary=True)

        factor = get_batch_factor()
        if factor != 1:
            lr = lr / float(factor)
            opt = tf.train.MomentumOptimizer(lr, 0.9)
            opt = optimizer.AccumGradOptimizer(opt, factor)
        else:
            opt = tf.train.MomentumOptimizer(lr, 0.9)
        return opt
        return optimizer.apply_grad_processors(
            opt, [gradproc.ScaleGradient(('.*/b', 2))])
Exemplo n.º 11
0
def get_config():
    logger.auto_set_dir()
    dataset = get_data()
    lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
    return TrainConfig(
        dataflow=dataset,
        optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6),
        callbacks=[ModelSaver()],
        session_config=get_default_sess_config(0.5),
        model=Model(),
        steps_per_epoch=500,
        max_epoch=100,
    )
Exemplo n.º 12
0
 def _get_opt(name, init_lr):
     lr = symbf.get_scalar_var('learning_rate/'+name, init_lr, summary=True)
     opt = tf.train.AdamOptimizer(lr)
     logger.info("create opt {}".format(name))
     gradprocs = [
         # MapGradient(lambda grad: tf.Print(grad, [grad], 'grad {}='.format(grad.op.name), summarize=4)),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1), regex='^actor/.*'),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05), regex='^critic/.*'),
         # GlobalNormClip(40.),
         SummaryGradient(),
     ]
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
Exemplo n.º 13
0
 def _get_opt(name, init_lr):
     lr = symbf.get_scalar_var('learning_rate/' + name,
                               init_lr,
                               summary=True)
     opt = tf.train.AdamOptimizer(lr)
     logger.info("create opt {}".format(name))
     gradprocs = [
         # MapGradient(lambda grad: tf.Print(grad, [grad], 'grad {}='.format(grad.op.name), summarize=4)),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1),
                     regex='^actor/.*'),
         MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05),
                     regex='^critic/.*'),
         # GlobalNormClip(40.),
         SummaryGradient(),
     ]
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
Exemplo n.º 14
0
def get_config(model):
    logger.auto_set_dir()

    dataset = model.get_data()
    steps_per_epoch = dataset.size()

    lr = symbf.get_scalar_var('learning_rate', 1e-4, summary=True)

    return TrainConfig(
        dataflow=dataset,
        model=model(),
        optimizer=tf.train.GradientDescentOptimizer(lr),
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(10, 1e-5),
                                                        (20, 1e-6)])
        ],
        steps_per_epoch=steps_per_epoch,
        max_epoch=20,
    )
Exemplo n.º 15
0
def get_config():
    logger.auto_set_dir()

    dataset_train, dataset_test = get_data(True), get_data(False)
    step_per_epoch = dataset_train.size() * 5

    lr = symbf.get_scalar_var('learning_rate', 5e-4, summary=True)

    return TrainConfig(
        dataset=dataset_train,
        optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
        callbacks=Callbacks([
            StatPrinter(), ModelSaver(),
            InferenceRunner(dataset_test,
                [ScalarStats('cost'), ClassificationError() ]),
            ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)])
        ]),
        session_config=get_default_sess_config(0.5),
        model=Model(),
        step_per_epoch=step_per_epoch,
        max_epoch=500,
    )
Exemplo n.º 16
0
def get_config():
    logger.auto_set_dir()

    M = Model()
    dataset_train = ExpReplay(
        predictor_io_names=(['state'], ['Qvalue']),
        player=get_player(train=True),
        batch_size=BATCH_SIZE,
        memory_size=MEMORY_SIZE,
        init_memory_size=INIT_MEMORY_SIZE,
        exploration=INIT_EXPLORATION,
        end_exploration=END_EXPLORATION,
        exploration_epoch_anneal=EXPLORATION_EPOCH_ANNEAL,
        update_frequency=4,
        reward_clip=(-1, 1),
        history_len=FRAME_HISTORY)

    lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)

    return TrainConfig(
        dataflow=dataset_train,
        optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
        callbacks=Callbacks([
            StatPrinter(),
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate',
                                      [(150, 4e-4), (250, 1e-4), (350, 5e-5)]),
            RunOp(lambda: M.update_target_param()),
            dataset_train,
            PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['Qvalue']),
                             3),
            # HumanHyperParamSetter('learning_rate', 'hyper.txt'),
            # HumanHyperParamSetter(ObjAttrParam(dataset_train, 'exploration'), 'hyper.txt'),
        ]),
        # save memory for multiprocess evaluator
        session_config=get_default_sess_config(0.6),
        model=M,
        step_per_epoch=STEP_PER_EPOCH,
    )
Exemplo n.º 17
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 1e-4, summary=True)
     return tf.train.GradientDescentOptimizer(lr)
Exemplo n.º 18
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True)
     return tf.train.AdamOptimizer(lr, epsilon=1e-3)
Exemplo n.º 19
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 0.003, summary=True)
     opt = tf.train.MomentumOptimizer(lr, 0.9)
     return optimizer.apply_grad_processors(
         opt, [gradproc.ScaleGradient(('.*/b', 2))])
Exemplo n.º 20
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
     opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6)
     # generator learns 5 times faster
     return optimizer.apply_grad_processors(
         opt, [gradproc.ScaleGradient(('gen/.*', 5), log=True)])
Exemplo n.º 21
0
 def _get_optimizer(self):
     lr = symbolic_functions.get_scalar_var('learning_rate',
                                            2e-3,
                                            summary=True)
     opt = tf.train.AdamOptimizer(lr)
     return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])
Exemplo n.º 22
0
    def _setup(self):
        from tensorpack.tfutils import symbolic_functions
        self._v_epoch_num = symbolic_functions.get_scalar_var('epoch_num',
                                                              0,
                                                              summary=True)

        import multiprocessing as mp
        self._epoch_shared = mp.Value('i', 0)

        super(SyncMultiGPUTrainerParameterServer, self)._setup()

        raw_devices = ['/device:GPU:{}'.format(k) for k in self.config.tower]
        # raw_devices = ['/gpu:{}'.format(k) for k in self.config.tower]
        if self._ps_device == 'gpu':
            devices = [
                LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices
            ]
        else:
            devices = [
                tf.train.replica_device_setter(worker_device=d,
                                               ps_device='/cpu:0',
                                               ps_tasks=1) for d in raw_devices
            ]

        from ..model.base import ModelBase
        model = self.model  # type: ModelBase
        assert (isinstance(model, ModelBase))

        logger.info("Building graph ...")
        model.build_graph(None)

        from tensorpack.callbacks.summary import MergeAllSummaries_RunWithOp, MovingAverageSummary
        train_ops_main = []
        train_ops_aux = {}
        for lname, loss in model._losses.items():
            logger.info("Building opt for {} loss {} ...".format(
                'main' if loss._isMainLoss else 'aux ', lname))
            opt = model.get_optimizer() if loss._opt is None else loss._opt
            grads_array = []
            for l in loss._losses:
                grads = opt.compute_gradients(
                    l,
                    gate_gradients=tf.train.Optimizer.GATE_NONE,
                    colocate_gradients_with_ops=True)
                grads = [(g, v) for g, v in grads if g is not None]
                grads_array.append(grads)
            grads = self._average_grads(grads_array)
            train_op = opt.apply_gradients(grads)
            summary_callbacks = []
            if isinstance(loss._summary_collection, str):
                c_vars = tf.get_collection(loss._summary_collection +
                                           '-ema_op')
                if len(c_vars) > 0:
                    summary_callbacks.append(
                        MovingAverageSummary(loss._summary_collection +
                                             '-ema_op'))
                summary_callbacks.append(
                    MergeAllSummaries_RunWithOp(0, loss._summary_collection))
            if loss._isMainLoss:
                train_ops_main.append(train_op)
                for c in summary_callbacks:
                    self.register_callback(c)
                if loss._tensor_io:
                    loss._tensor_io._is_main = True
                    self.register_callback(loss._tensor_io)
            elif loss._trainOpGroup is not None:
                if loss._trainOpGroup not in train_ops_aux:
                    train_ops_aux[loss._trainOpGroup] = _AuxTrainOp(
                        loss._trainOpGroup)
                auxTrainOp = train_ops_aux[loss._trainOpGroup]
                auxTrainOp._train_ops.append(train_op)
                auxTrainOp._callbacks += summary_callbacks
                if loss._tensor_io:
                    auxTrainOp._callbacks.append(loss._tensor_io)
            else:
                auxTrainOp = _AuxTrainOp(lname)
                auxTrainOp._train_ops = [train_op]
                auxTrainOp._callbacks += summary_callbacks
                if loss._tensor_io:
                    auxTrainOp._callbacks.append(loss._tensor_io)
                train_ops_aux[lname] = (auxTrainOp)

        for n, auxTrainOp in train_ops_aux.items():
            assert (len(auxTrainOp._train_ops) > 0)
            auxTrainOp._train_op = tf.group(*auxTrainOp._train_ops,
                                            name=n + '/train_op')
            for c in auxTrainOp._callbacks:
                c.setup_graph(self)
        # for rname, rop in model._run_ops.items():
        #     train_ops_aux.append(tf.group(*rop._run_ops, name=rname + '/run-op'))

        var_lists = tf.get_collection_ref(tf.GraphKeys.TRAINABLE_VARIABLES)
        var_lists[:] = [
            v for v in var_lists if not v.name.startswith('evaluate/')
        ]

        self.train_op = tf.group(*train_ops_main, name='train_op')
        self._train_ops_aux = train_ops_aux
Exemplo n.º 23
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)
     opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     return optimizer.apply_grad_processors(
         opt, [gradproc.GlobalNormClip(10),
               gradproc.SummaryGradient()])
Exemplo n.º 24
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
     opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6)
     # generator learns 5 times faster
     return optimizer.apply_grad_processors(
         opt, [gradproc.ScaleGradient(('gen/.*', 5), log=True)])
Exemplo n.º 25
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 0.1, summary=True)
     return tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
Exemplo n.º 26
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 3e-5, summary=True)
     opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
     return optimizer.apply_grad_processors(opt, [
         gradproc.ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)])
     ])
Exemplo n.º 27
0
 def _get_optimizer(self):
     lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
     return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
Exemplo n.º 28
0
    def _build_ad_nn(self, tensor_io):
        from drlutils.dataflow.tensor_io import TensorIO
        assert (isinstance(tensor_io, TensorIO))
        from drlutils.model.base import get_current_nn_context
        from tensorpack.tfutils.common import get_global_step_var
        import tensorpack.tfutils.symbolic_functions as symbf
        global_step = get_global_step_var()
        nnc = get_current_nn_context()
        is_training = nnc.is_training
        i_state = tensor_io.getInputTensor('state')
        i_agentIdent = tensor_io.getInputTensor('agentIdent')
        i_sequenceLength = tensor_io.getInputTensor('sequenceLength')
        i_resetRNN = tensor_io.getInputTensor('resetRNN')
        l = i_state
        # l = tf.Print(l, [i_state, tf.shape(i_state)], 'State = ')
        # l = tf.Print(l, [i_agentIdent, tf.shape(i_agentIdent)], 'agentIdent = ')
        # l = tf.Print(l, [i_sequenceLength, tf.shape(i_sequenceLength)], 'SeqLen = ')
        # l = tf.Print(l, [i_resetRNN, tf.shape(i_resetRNN)], 'resetRNN = ')
        with tf.variable_scope('critic', reuse=nnc.reuse) as vs:

            def _get_cell():
                cell = tf.nn.rnn_cell.BasicLSTMCell(256)
                # if is_training:
                #     cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=0.9)
                return cell

            cell = tf.nn.rnn_cell.MultiRNNCell([_get_cell() for _ in range(1)])
            rnn_outputs = self._buildRNN(
                l,
                cell,
                tensor_io.batchSize,
                i_agentIdent=i_agentIdent,
                i_sequenceLength=i_sequenceLength,
                i_resetRNN=i_resetRNN,
            )
            rnn_outputs = tf.reshape(
                rnn_outputs, [-1, rnn_outputs.get_shape().as_list()[-1]])
            l = rnn_outputs
            from ad_cur.autodrive.model.selu import fc_selu
            for lidx in range(2):
                l = fc_selu(
                    l,
                    200,
                    keep_prob=1.,  # 由于我们只使用传感器训练,关键信息不能丢
                    is_training=is_training,
                    name='fc-{}'.format(lidx))
            value = tf.layers.dense(l, 1, name='fc-value')
            value = tf.squeeze(value, [1], name="value")
            if not hasattr(self, '_weights_critic'):
                self._weights_critic = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope('actor', reuse=nnc.reuse) as vs:
            l = tf.stop_gradient(l)
            mu_steering = 1. * tf.layers.dense(
                l, 1, activation=tf.nn.tanh, name='fc-mu-steering')
            mu_accel = tf.layers.dense(l,
                                       1,
                                       activation=tf.nn.tanh,
                                       name='fc-mu-accel')
            mus = tf.concat([mu_steering, mu_accel], axis=-1)

            # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus')
            # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas')
            # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5)
            def saturating_sigmoid(x):
                """Saturating sigmoid: 1.2 * sigmoid(x) - 0.1 cut to [0, 1]."""
                with tf.name_scope("saturating_sigmoid", [x]):
                    y = tf.sigmoid(x)
                    return tf.minimum(1.0, tf.maximum(0.0, 1.2 * y - 0.1))

            sigma_steering_ = 1. * tf.layers.dense(
                l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering')
            sigma_accel_ = 1. * tf.layers.dense(
                l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel')
            sigma_beta_steering = symbf.get_scalar_var('sigma_beta_steering',
                                                       0.3,
                                                       summary=True,
                                                       trainable=False)
            sigma_beta_accel = symbf.get_scalar_var('sigma_beta_accel',
                                                    0.3,
                                                    summary=True,
                                                    trainable=False)

            if not nnc.is_evaluating:
                pass
                # sigma_beta_steering_exp = tf.train.exponential_decay(0.3, global_step, 1000, 0.5, name='sigma/beta/steering/exp')
                # sigma_beta_accel_exp = tf.train.exponential_decay(0.5, global_step, 5000, 0.5, name='sigma/beta/accel/exp')
            else:
                sigma_beta_steering = tf.constant(1e-4)
                sigma_beta_accel = tf.constant(1e-4)
            sigma_steering = (sigma_steering_ + sigma_beta_steering)
            sigma_accel = (sigma_accel_ + sigma_beta_accel)  # * 0.1

            # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5)
            # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2)
            # sigma_steering = sigma_steering_
            # sigma_steering = sigma_steering_
            # sigma_accel = sigma_accel_
            sigmas = tf.clip_by_value(
                tf.concat([sigma_steering, sigma_accel], axis=-1), 0., 1.)
            #     sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5)

            #     sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5)

            # sigmas = sigmas_orig + 0.001
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32,
            #                              initializer=tf.constant_initializer(.5), trainable=False)

            # if is_training:
            #     pass
            #     # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因:
            #     #   1、训练前期尽量大的探索可以避免网络陷入局部最优
            #     #   2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来
            #
            # if is_training:
            #     sigmas += sigma_beta_steering
            # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5)
            # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5)
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigmas_orig = sigmas
            # sigmas = sigmas + sigma_beta_steering
            # sigmas = tf.minimum(sigmas + 0.1, 100)
            # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1)
            # sigma_steering += sigma_beta_steering
            # sigma_accel += sigma_beta_accel

            # mus = tf.concat([mu_steering, mu_accel], axis=-1)

            from tensorflow.contrib.distributions import Normal
            dists = Normal(mus, sigmas)
            policy = tf.squeeze(dists.sample([1]), [0])
            # 裁剪到两倍方差之内
            policy = tf.clip_by_value(policy, mus - 2 * sigmas,
                                      mus + 2 * sigmas)
            if is_training:
                self._addMovingSummary(
                    tf.reduce_mean(mu_steering, name='mu/steering/mean'),
                    tf.reduce_mean(mu_accel, name='mu/accel/mean'),
                    tf.reduce_mean(sigma_steering, name='sigma/steering/mean'),
                    tf.reduce_max(sigma_steering, name='sigma/steering/max'),
                    tf.reduce_mean(sigma_accel, name='sigma/accel/mean'),
                    tf.reduce_max(sigma_accel, name='sigma/accel/max'),
                    sigma_beta_accel,
                    sigma_beta_steering,
                )
            # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions],
            #                    'mu/sigma/sigma.orig/act=', summarize=4)
            if not hasattr(self, '_weights_actor'):
                self._weights_actor = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)
        if not is_training:
            tensor_io.setOutputTensors(policy, value, mus, sigmas)
            return

        i_actions = tensor_io.getInputTensor("action")
        i_actions = tf.reshape(i_actions,
                               [-1] + i_actions.get_shape().as_list()[2:])
        log_probs = dists.log_prob(i_actions)
        # exp_v = tf.transpose(
        #     tf.multiply(tf.transpose(log_probs), advantage))
        # exp_v = tf.multiply(log_probs, advantage)
        i_advantage = tensor_io.getInputTensor("advantage")
        i_advantage = tf.reshape(i_advantage,
                                 [-1] + i_advantage.get_shape().as_list()[2:])
        exp_v = log_probs * tf.expand_dims(i_advantage, -1)
        entropy = dists.entropy()
        entropy_beta = tf.get_variable(
            'entropy_beta',
            shape=[],
            initializer=tf.constant_initializer(0.01),
            trainable=False)
        exp_v = entropy_beta * entropy + exp_v
        loss_policy = tf.reduce_mean(-tf.reduce_sum(exp_v, axis=-1),
                                     name='loss/policy')

        i_futurereward = tensor_io.getInputTensor("futurereward")
        i_futurereward = tf.reshape(i_futurereward, [-1] +
                                    i_futurereward.get_shape().as_list()[2:])
        loss_value = tf.reduce_mean(0.5 * tf.square(value - i_futurereward))

        loss_entropy = tf.reduce_mean(tf.reduce_sum(entropy, axis=-1),
                                      name='xentropy_loss')

        from tensorflow.contrib.layers.python.layers.regularizers import apply_regularization, l2_regularizer
        loss_l2_regularizer = apply_regularization(l2_regularizer(1e-4),
                                                   self._weights_critic)
        loss_l2_regularizer = tf.identity(loss_l2_regularizer, 'loss/l2reg')
        loss_value += loss_l2_regularizer
        loss_value = tf.identity(loss_value, name='loss/value')

        # self.cost = tf.add_n([loss_policy, loss_value * 0.1, loss_l2_regularizer])

        self._addParamSummary([('.*', ['rms', 'absmax'])])
        pred_reward = tf.reduce_mean(value, name='predict_reward')

        advantage = symbf.rms(i_advantage, name='rms_advantage')
        self._addMovingSummary(
            loss_policy,
            loss_value,
            loss_entropy,
            pred_reward,
            advantage,
            loss_l2_regularizer,
            tf.reduce_mean(policy[:, 0], name='actor/steering/mean'),
            tf.reduce_mean(policy[:, 1], name='actor/accel/mean'),
        )
        return loss_policy, loss_value
Exemplo n.º 29
0
	def _get_optimizer(self):
		lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
		return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)