示例#1
0
 def __init__(
         self,
         conv_filters, conv_filter_sizes, conv_strides, conv_pads,
         conv_nonlinearity=LN.rectify,
         hidden_sizes=[],
         hidden_nonlinearity=LN.tanh,
         output_pi_nonlinearity=LN.softmax,
         pixel_scale=255.,
         alternating_sampler=False,
         **kwargs
         ):
     """
     The policy consists of several convolution layers followed by recurrent
     layers and softmax
     :param env_spec: A spec for the mdp.
     :param conv_filters, conv_filter_sizes, conv_strides, conv_pads: specify the convolutional layers. See rllab.core.network.ConvNetwork for details.
     :param hidden_sizes: list of sizes for the fully connected hidden layers
     :param hidden_nonlinearity: nonlinearity used for each hidden layer
     :param prob_network: manually specified network for this policy, other network params
     are ignored
     :param feature_layer_index: index of the feature layer. Default -2 means the last layer before fc-softmax
     :param eps: mixture weight on uniform distribution; useful to force exploration
     :return:
     """
     save_args(vars())
     super().__init__(**kwargs)
示例#2
0
 def __init__(
     self,
     algo,
     policy,
     sampler,
     seed,
     affinities,
     rank,
     n_runners,
     use_gpu=True,
 ):
     save_args(vars(), underscore=False)
示例#3
0
 def __init__(
     self,
     rank,
     envs,
     sync,
     segs_buf,
     step_buf,
     horizon,
     max_path_length,
     discount,
 ):
     save_args(vars(), underscore=False)
     self.n_envs = len(envs)
示例#4
0
 def __init__(
         self,
         conv_filters, conv_filter_sizes, conv_strides, conv_pads,
         hidden_sizes=[],
         hidden_nonlinearity=LN.rectify,
         pixel_scale=255.,
         epsilon=1,
         dueling=False,
         shared_last_bias=False,
         **kwargs
         ):
     save_args(vars())
     super().__init__(**kwargs)
示例#5
0
 def __init__(
     self,
     discount,
     gae_lambda,
     v_loss_coeff=1,
     ent_loss_coeff=0.01,
     standardize_adv=False,
     lr_schedule=None,
 ):
     if lr_schedule is not None and lr_schedule not in LR_SCHEDULES:
         raise ValueError("Unrecognized lr_schedule: {}, should be None "
                          "(for constant) or in: {}".format(
                              lr_schedule, LR_SCHEDULES))
     save_args(vars(), underscore=False)
     self.need_extra_obs = True  # (signal sent to the sampler)
示例#6
0
    def __init__(
            self,
            discount=0.99,
            batch_size=32,
            min_steps_learn=int(5e4),
            delta_clip=1,
            replay_size=int(1e6),
            training_intensity=8,  # avg number of training uses per datum
            target_update_steps=int(1e4),
            reward_horizon=1,
            OptimizerCls=None,
            optimizer_args=None,
            eps_greedy_args=None,
            double_dqn=False,
            dueling_dqn=False,  # Just a shortcut for optimizer args
            prioritized_replay=False,
            priority_args=None,
            ):
        save_args(vars(), underscore=False)

        opt_args, eps_args, pri_args = self._get_default_sub_args()

        if optimizer_args is not None:
            opt_args.update(optimizer_args)
        if OptimizerCls is None:
            OptimizerCls = DqnOptimizer
        self.optimizer = OptimizerCls(**opt_args)

        if eps_greedy_args is not None:
            eps_args.update(eps_greedy_args)
        self._eps_initial = eps_args["initial"]
        self._eps_final = eps_args["final"]
        self._eps_eval = eps_args["eval"]
        self._eps_anneal_steps = eps_args["anneal_steps"]

        if prioritized_replay:
            if priority_args is not None:
                pri_args.update(priority_args)
            self._priority_beta_initial = pri_args["beta_initial"]
            self._priority_beta_final = pri_args["beta_final"]
            self._priority_beta_anneal_steps = pri_args["beta_anneal_steps"]
            self._priority_args = dict(
                alpha=pri_args["alpha"],
                beta_initial=pri_args["beta_initial"],
                default_priority=pri_args["default_priority"],
            )

        self.need_extra_obs = False  # (for the sampler; should clean this up)
示例#7
0
 def __init__(
     self,
     learning_rate,
     update_method_name,
     update_method_args,
     grad_norm_clip=None,
     scale_conv_grads=False,
     n_update_chunks=3,
 ):
     assert update_method_name in ["rmsprop", "adam"]
     save_args(vars(), underscore=True)
     self.n_update_chunks = n_update_chunks
     if n_update_chunks == 1:
         self._push_update = self._single_lock_push
     else:
         self._push_update = self._cycle_locks_push
示例#8
0
 def __init__(
         self,
         conv_filters, conv_filter_sizes, conv_strides, conv_pads,
         hidden_sizes=[],
         hidden_nonlinearity=LN.rectify,
         pixel_scale=255.,
         epsilon=1,
         # shared_last_bias=False,
         factorized=True,
         common_noise=False,
         sigma_0=0.4,
         use_mu_init=True,
         **kwargs
         ):
     save_args(vars())
     super().__init__(**kwargs)
示例#9
0
文件: base.py 项目: yaohuic/accel_rl
    def __init__(
        self,
        EnvCls,
        env_args,
        horizon,
        n_parallel=1,
        envs_per=1,
        max_path_length=np.inf,
        mid_batch_reset=True,
        max_decorrelation_steps=2000,
        profile_pathname=None,
    ):

        save_args(vars(), underscore=False)
        self.common_kwargs = vars(self).copy()
        self.common_kwargs.pop("n_parallel")
示例#10
0
 def __init__(
     self,
     algo,
     policy,
     sampler,
     n_steps,
     seed=None,
     affinities=None,
     use_gpu=True,
 ):
     n_steps = int(n_steps)
     save_args(vars(), underscore=False)
     if affinities is None:
         self.affinities = dict()
     if algo.optimizer.parallelism_tag != self.parallelism_tag:
         raise TypeError("Had mismatched parallelism between Runner ({}) "
                         "and Optimizer: {}".format(
                             self.parallelism_tag,
                             algo.optimizer.parallelism_tag))