Exemplo n.º 1
0
    def __init__(self,
                 env,
                 networks,
                 scope=None,
                 n_itr=5000,
                 start_itr=0,
                 batch_size=32,
                 max_path_length=200,
                 discount=0.99,
                 plot=False,
                 pause_for_plot=False,
                 center_adv=True,
                 max_epsilon=1,
                 min_epsilon=0.01,
                 store_paths=False,
                 whole_paths=True,
                 sampler_cls=None,
                 sampler_args=None,
                 force_batch_sampler=True,
                 pre_trained_size=10000,
                 target_network_update=1000,
                 save_param_update=125,
                 **kwargs):
        """
        :param env: Environment
        :param policy: Policy
        :type policy: Policy
        :param scope: Scope for identifying the algorithm. Must be specified if running multiple algorithms
        simultaneously, each using different environments and policies
        :param n_itr: Number of iterations.
        :param start_itr: Starting iteration.
        :param batch_size: Number of samples per iteration.
        :param max_path_length: Maximum length of a single rollout.
        :param discount: Discount.
        :param plot: Plot evaluation run after each iteration.
        :param pause_for_plot: Whether to pause before contiuing when plotting.
        :param store_paths: Whether to save all paths data to the snapshot.
        :return:
        """
        self.ma_mode = kwargs.pop('ma_mode', 'centralized')
        self.env = env
        self.policy = networks['q_network']
        self.target_policy = networks['target_q_network']
        self.scope = scope
        self.n_itr = n_itr
        self.start_itr = start_itr
        self.batch_size = batch_size
        self.max_path_length = max_path_length
        self.discount = discount
        self.plot = plot
        self.max_epsilon = max_epsilon
        self.min_epsilon = min_epsilon
        self.pause_for_plot = pause_for_plot
        self.center_adv = center_adv
        self.store_paths = store_paths
        self.whole_paths = whole_paths
        self.force_batch_sampler = force_batch_sampler
        self.loss_after = 0
        self.mean_kl = 0
        self.max_kl = 0
        self.save_param_update = save_param_update
        self.target_network_update = target_network_update
        self.writer = None
        self.write_op = None
        self.pre_trained_size = pre_trained_size
        self.total_episodic_rewards = None
        self.s_loss = None
        self.s_avg_rewards = None
        self.s_total_rewards = None

        if sampler_cls is None:
            sampler_cls = ExpReplayMASampler
        if sampler_args is None:
            sampler_args = dict()
        self.sampler = sampler_cls(algo=self, **sampler_args)

        if plot:
            from rllab.plotter import plotter
            plotter.init_worker()

        self.init_opt()
Exemplo n.º 2
0
Arquivo: BP.py Projeto: hl00/maml_rl
 def start_worker(self):
     self.sampler.start_worker()
     if self.plot:
         plotter.init_worker()###
         plotter.init_plot(self.env, self.policy)